1 /**********************************************************************
2 
3   pack.c -
4 
5   $Author: shyouhei $
6   created at: Thu Feb 10 15:17:05 JST 1994
7 
8   Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/encoding.h"
13 #include "internal.h"
14 #include <sys/types.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <float.h>
18 
19 /*
20  * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
21  * instead of HAVE_LONG_LONG or LONG_LONG.
22  * This means q! and Q! means always the standard long long type and
23  * causes ArgumentError for platforms which has no long long type,
24  * even if the platform has an implementation specific 64bit type.
25  * This behavior is consistent with the document of pack/unpack.
26  */
27 #ifdef HAVE_TRUE_LONG_LONG
28 static const char natstr[] = "sSiIlLqQjJ";
29 #else
30 static const char natstr[] = "sSiIlLjJ";
31 #endif
32 static const char endstr[] = "sSiIlLqQjJ";
33 
34 #ifdef HAVE_TRUE_LONG_LONG
35 /* It is intentional to use long long instead of LONG_LONG. */
36 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
37 #else
38 # define NATINT_LEN_Q 8
39 #endif
40 
41 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
42 # define NATINT_PACK
43 #endif
44 
45 #ifdef DYNAMIC_ENDIAN
46 /* for universal binary of NEXTSTEP and MacOS X */
47 /* useless since autoconf 2.63? */
48 static int
is_bigendian(void)49 is_bigendian(void)
50 {
51     static int init = 0;
52     static int endian_value;
53     char *p;
54 
55     if (init) return endian_value;
56     init = 1;
57     p = (char*)&init;
58     return endian_value = p[0]?0:1;
59 }
60 # define BIGENDIAN_P() (is_bigendian())
61 #elif defined(WORDS_BIGENDIAN)
62 # define BIGENDIAN_P() 1
63 #else
64 # define BIGENDIAN_P() 0
65 #endif
66 
67 #ifdef NATINT_PACK
68 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
69 #else
70 # define NATINT_LEN(type,len) ((int)sizeof(type))
71 #endif
72 
73 typedef union {
74     float f;
75     uint32_t u;
76     char buf[4];
77 } FLOAT_SWAPPER;
78 typedef union {
79     double d;
80     uint64_t u;
81     char buf[8];
82 } DOUBLE_SWAPPER;
83 #define swapf(x) swap32(x)
84 #define swapd(x) swap64(x)
85 
86 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
87 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
88 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
89 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
90 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
91 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
92 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
93 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
94 
95 #define FLOAT_CONVWITH(x)	FLOAT_SWAPPER x;
96 #define HTONF(x)	((x).u = rb_htonf((x).u))
97 #define HTOVF(x)	((x).u = rb_htovf((x).u))
98 #define NTOHF(x)	((x).u = rb_ntohf((x).u))
99 #define VTOHF(x)	((x).u = rb_vtohf((x).u))
100 
101 #define DOUBLE_CONVWITH(x)	DOUBLE_SWAPPER x;
102 #define HTOND(x)	((x).u = rb_htond((x).u))
103 #define HTOVD(x)	((x).u = rb_htovd((x).u))
104 #define NTOHD(x)	((x).u = rb_ntohd((x).u))
105 #define VTOHD(x)	((x).u = rb_vtohd((x).u))
106 
107 #define MAX_INTEGER_PACK_SIZE 8
108 
109 static const char toofew[] = "too few arguments";
110 
111 static void encodes(VALUE,const char*,long,int,int);
112 static void qpencode(VALUE,VALUE,long);
113 
114 static unsigned long utf8_to_uv(const char*,long*);
115 
116 static ID id_associated;
117 
118 static void
str_associate(VALUE str,VALUE add)119 str_associate(VALUE str, VALUE add)
120 {
121     /* assert(NIL_P(rb_attr_get(str, id_associated))); */
122     rb_ivar_set(str, id_associated, add);
123 }
124 
125 static VALUE
str_associated(VALUE str)126 str_associated(VALUE str)
127 {
128     return rb_ivar_lookup(str, id_associated, Qfalse);
129 }
130 
131 static void
unknown_directive(const char * mode,char type,VALUE fmt)132 unknown_directive(const char *mode, char type, VALUE fmt)
133 {
134     VALUE f;
135     char unknown[5];
136 
137     if (ISPRINT(type)) {
138         unknown[0] = type;
139         unknown[1] = '\0';
140     }
141     else {
142         snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
143     }
144     f = rb_str_quote_unprintable(fmt);
145     if (f != fmt) {
146         fmt = rb_str_subseq(f, 1, RSTRING_LEN(f) - 2);
147     }
148     rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
149                mode, unknown, fmt);
150 }
151 
152 static float
VALUE_to_float(VALUE obj)153 VALUE_to_float(VALUE obj)
154 {
155     VALUE v = rb_to_float(obj);
156     double d = RFLOAT_VALUE(v);
157 
158     if (isnan(d)) {
159         return NAN;
160     }
161     else if (d < -FLT_MAX) {
162         return -INFINITY;
163     }
164     else if (d <= FLT_MAX) {
165         return d;
166     }
167     else {
168         return INFINITY;
169     }
170 }
171 
172 /*
173  *  call-seq:
174  *     arr.pack( aTemplateString ) -> aBinaryString
175  *     arr.pack( aTemplateString, buffer: aBufferString ) -> aBufferString
176  *
177  *  Packs the contents of <i>arr</i> into a binary sequence according to
178  *  the directives in <i>aTemplateString</i> (see the table below)
179  *  Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
180  *  which gives the width of the resulting field. The remaining
181  *  directives also may take a count, indicating the number of array
182  *  elements to convert. If the count is an asterisk
183  *  (``<code>*</code>''), all remaining array elements will be
184  *  converted. Any of the directives ``<code>sSiIlL</code>'' may be
185  *  followed by an underscore (``<code>_</code>'') or
186  *  exclamation mark (``<code>!</code>'') to use the underlying
187  *  platform's native size for the specified type; otherwise, they use a
188  *  platform-independent size. Spaces are ignored in the template
189  *  string. See also <code>String#unpack</code>.
190  *
191  *     a = [ "a", "b", "c" ]
192  *     n = [ 65, 66, 67 ]
193  *     a.pack("A3A3A3")   #=> "a  b  c  "
194  *     a.pack("a3a3a3")   #=> "a\000\000b\000\000c\000\000"
195  *     n.pack("ccc")      #=> "ABC"
196  *
197  *  If <i>aBufferString</i> is specified and its capacity is enough,
198  *  +pack+ uses it as the buffer and returns it.
199  *  When the offset is specified by the beginning of <i>aTemplateString</i>,
200  *  the result is filled after the offset.
201  *  If original contents of <i>aBufferString</i> exists and it's longer than
202  *  the offset, the rest of <i>offsetOfBuffer</i> are overwritten by the result.
203  *  If it's shorter, the gap is filled with ``<code>\0</code>''.
204  *
205  *  Note that ``buffer:'' option does not guarantee not to allocate memory
206  *  in +pack+.  If the capacity of <i>aBufferString</i> is not enough,
207  *  +pack+ allocates memory.
208  *
209  *  Directives for +pack+.
210  *
211  *   Integer       | Array   |
212  *   Directive     | Element | Meaning
213  *   ----------------------------------------------------------------------------
214  *   C             | Integer | 8-bit unsigned (unsigned char)
215  *   S             | Integer | 16-bit unsigned, native endian (uint16_t)
216  *   L             | Integer | 32-bit unsigned, native endian (uint32_t)
217  *   Q             | Integer | 64-bit unsigned, native endian (uint64_t)
218  *   J             | Integer | pointer width unsigned, native endian (uintptr_t)
219  *                 |         | (J is available since Ruby 2.3.)
220  *                 |         |
221  *   c             | Integer | 8-bit signed (signed char)
222  *   s             | Integer | 16-bit signed, native endian (int16_t)
223  *   l             | Integer | 32-bit signed, native endian (int32_t)
224  *   q             | Integer | 64-bit signed, native endian (int64_t)
225  *   j             | Integer | pointer width signed, native endian (intptr_t)
226  *                 |         | (j is available since Ruby 2.3.)
227  *                 |         |
228  *   S_ S!         | Integer | unsigned short, native endian
229  *   I I_ I!       | Integer | unsigned int, native endian
230  *   L_ L!         | Integer | unsigned long, native endian
231  *   Q_ Q!         | Integer | unsigned long long, native endian (ArgumentError
232  *                 |         | if the platform has no long long type.)
233  *                 |         | (Q_ and Q! is available since Ruby 2.1.)
234  *   J!            | Integer | uintptr_t, native endian (same with J)
235  *                 |         | (J! is available since Ruby 2.3.)
236  *                 |         |
237  *   s_ s!         | Integer | signed short, native endian
238  *   i i_ i!       | Integer | signed int, native endian
239  *   l_ l!         | Integer | signed long, native endian
240  *   q_ q!         | Integer | signed long long, native endian (ArgumentError
241  *                 |         | if the platform has no long long type.)
242  *                 |         | (q_ and q! is available since Ruby 2.1.)
243  *   j!            | Integer | intptr_t, native endian (same with j)
244  *                 |         | (j! is available since Ruby 2.3.)
245  *                 |         |
246  *   S> s> S!> s!> | Integer | same as the directives without ">" except
247  *   L> l> L!> l!> |         | big endian
248  *   I!> i!>       |         | (available since Ruby 1.9.3)
249  *   Q> q> Q!> q!> |         | "S>" is same as "n"
250  *   J> j> J!> j!> |         | "L>" is same as "N"
251  *                 |         |
252  *   S< s< S!< s!< | Integer | same as the directives without "<" except
253  *   L< l< L!< l!< |         | little endian
254  *   I!< i!<       |         | (available since Ruby 1.9.3)
255  *   Q< q< Q!< q!< |         | "S<" is same as "v"
256  *   J< j< J!< j!< |         | "L<" is same as "V"
257  *                 |         |
258  *   n             | Integer | 16-bit unsigned, network (big-endian) byte order
259  *   N             | Integer | 32-bit unsigned, network (big-endian) byte order
260  *   v             | Integer | 16-bit unsigned, VAX (little-endian) byte order
261  *   V             | Integer | 32-bit unsigned, VAX (little-endian) byte order
262  *                 |         |
263  *   U             | Integer | UTF-8 character
264  *   w             | Integer | BER-compressed integer
265  *
266  *   Float        | Array   |
267  *   Directive    | Element | Meaning
268  *   ---------------------------------------------------------------------------
269  *   D d          | Float   | double-precision, native format
270  *   F f          | Float   | single-precision, native format
271  *   E            | Float   | double-precision, little-endian byte order
272  *   e            | Float   | single-precision, little-endian byte order
273  *   G            | Float   | double-precision, network (big-endian) byte order
274  *   g            | Float   | single-precision, network (big-endian) byte order
275  *
276  *   String       | Array   |
277  *   Directive    | Element | Meaning
278  *   ---------------------------------------------------------------------------
279  *   A            | String  | arbitrary binary string (space padded, count is width)
280  *   a            | String  | arbitrary binary string (null padded, count is width)
281  *   Z            | String  | same as ``a'', except that null is added with *
282  *   B            | String  | bit string (MSB first)
283  *   b            | String  | bit string (LSB first)
284  *   H            | String  | hex string (high nibble first)
285  *   h            | String  | hex string (low nibble first)
286  *   u            | String  | UU-encoded string
287  *   M            | String  | quoted printable, MIME encoding (see also RFC2045)
288  *                |         | (text mode but input must use LF and output LF)
289  *   m            | String  | base64 encoded string (see RFC 2045, count is width)
290  *                |         | (if count is 0, no line feed are added, see RFC 4648)
291  *   P            | String  | pointer to a structure (fixed-length string)
292  *   p            | String  | pointer to a null-terminated string
293  *
294  *   Misc.        | Array   |
295  *   Directive    | Element | Meaning
296  *   ---------------------------------------------------------------------------
297  *   @            | ---     | moves to absolute position
298  *   X            | ---     | back up a byte
299  *   x            | ---     | null byte
300  */
301 
302 static VALUE
pack_pack(int argc,VALUE * argv,VALUE ary)303 pack_pack(int argc, VALUE *argv, VALUE ary)
304 {
305     static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
306     static const char spc10[] = "          ";
307     const char *p, *pend;
308     VALUE fmt, opt = Qnil, res, from, associates = 0, buffer = 0;
309     char type;
310     long len, idx, plen;
311     const char *ptr;
312     int enc_info = 1;		/* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
313 #ifdef NATINT_PACK
314     int natint;		/* native integer */
315 #endif
316     int integer_size, bigendian_p;
317 
318     rb_scan_args(argc, argv, "10:", &fmt, &opt);
319 
320     StringValue(fmt);
321     p = RSTRING_PTR(fmt);
322     pend = p + RSTRING_LEN(fmt);
323     if (!NIL_P(opt)) {
324 	static ID keyword_ids[1];
325 	if (!keyword_ids[0])
326 	    CONST_ID(keyword_ids[0], "buffer");
327 
328 	rb_get_kwargs(opt, keyword_ids, 0, 1, &buffer);
329 
330 	if (buffer != Qundef && !RB_TYPE_P(buffer, T_STRING))
331 	    rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
332     }
333     if (buffer)
334 	res = buffer;
335     else
336 	res = rb_str_buf_new(0);
337 
338     idx = 0;
339 
340 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
341 #define MORE_ITEM (idx < RARRAY_LEN(ary))
342 #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
343 #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
344 
345     while (p < pend) {
346 	int explicit_endian = 0;
347 	if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
348 	    rb_raise(rb_eRuntimeError, "format string modified");
349 	}
350 	type = *p++;		/* get data type */
351 #ifdef NATINT_PACK
352 	natint = 0;
353 #endif
354 
355 	if (ISSPACE(type)) continue;
356 	if (type == '#') {
357 	    while ((p < pend) && (*p != '\n')) {
358 		p++;
359 	    }
360 	    continue;
361 	}
362 
363 	{
364           modifiers:
365 	    switch (*p) {
366 	      case '_':
367 	      case '!':
368 		if (strchr(natstr, type)) {
369 #ifdef NATINT_PACK
370 		    natint = 1;
371 #endif
372 		    p++;
373 		}
374 		else {
375 		    rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
376 		}
377 		goto modifiers;
378 
379 	      case '<':
380 	      case '>':
381 		if (!strchr(endstr, type)) {
382 		    rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
383 		}
384 		if (explicit_endian) {
385 		    rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
386 		}
387 		explicit_endian = *p++;
388 		goto modifiers;
389 	    }
390 	}
391 
392 	if (*p == '*') {	/* set data length */
393 	    len = strchr("@Xxu", type) ? 0
394                 : strchr("PMm", type) ? 1
395                 : RARRAY_LEN(ary) - idx;
396 	    p++;
397 	}
398 	else if (ISDIGIT(*p)) {
399 	    errno = 0;
400 	    len = STRTOUL(p, (char**)&p, 10);
401 	    if (errno) {
402 		rb_raise(rb_eRangeError, "pack length too big");
403 	    }
404 	}
405 	else {
406 	    len = 1;
407 	}
408 
409 	switch (type) {
410 	  case 'U':
411 	    /* if encoding is US-ASCII, upgrade to UTF-8 */
412 	    if (enc_info == 1) enc_info = 2;
413 	    break;
414 	  case 'm': case 'M': case 'u':
415 	    /* keep US-ASCII (do nothing) */
416 	    break;
417 	  default:
418 	    /* fall back to BINARY */
419 	    enc_info = 0;
420 	    break;
421 	}
422 	switch (type) {
423 	  case 'A': case 'a': case 'Z':
424 	  case 'B': case 'b':
425 	  case 'H': case 'h':
426 	    from = NEXTFROM;
427 	    if (NIL_P(from)) {
428 		ptr = "";
429 		plen = 0;
430 	    }
431 	    else {
432 		StringValue(from);
433 		ptr = RSTRING_PTR(from);
434 		plen = RSTRING_LEN(from);
435 		OBJ_INFECT(res, from);
436 	    }
437 
438 	    if (p[-1] == '*')
439 		len = plen;
440 
441 	    switch (type) {
442 	      case 'a':		/* arbitrary binary string (null padded)  */
443 	      case 'A':         /* arbitrary binary string (ASCII space padded) */
444 	      case 'Z':         /* null terminated string  */
445 		if (plen >= len) {
446 		    rb_str_buf_cat(res, ptr, len);
447 		    if (p[-1] == '*' && type == 'Z')
448 			rb_str_buf_cat(res, nul10, 1);
449 		}
450 		else {
451 		    rb_str_buf_cat(res, ptr, plen);
452 		    len -= plen;
453 		    while (len >= 10) {
454 			rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
455 			len -= 10;
456 		    }
457 		    rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
458 		}
459 		break;
460 
461 #define castchar(from) (char)((from) & 0xff)
462 
463 	      case 'b':		/* bit string (ascending) */
464 		{
465 		    int byte = 0;
466 		    long i, j = 0;
467 
468 		    if (len > plen) {
469 			j = (len - plen + 1)/2;
470 			len = plen;
471 		    }
472 		    for (i=0; i++ < len; ptr++) {
473 			if (*ptr & 1)
474 			    byte |= 128;
475 			if (i & 7)
476 			    byte >>= 1;
477 			else {
478 			    char c = castchar(byte);
479 			    rb_str_buf_cat(res, &c, 1);
480 			    byte = 0;
481 			}
482 		    }
483 		    if (len & 7) {
484 			char c;
485 			byte >>= 7 - (len & 7);
486 			c = castchar(byte);
487 			rb_str_buf_cat(res, &c, 1);
488 		    }
489 		    len = j;
490 		    goto grow;
491 		}
492 		break;
493 
494 	      case 'B':		/* bit string (descending) */
495 		{
496 		    int byte = 0;
497 		    long i, j = 0;
498 
499 		    if (len > plen) {
500 			j = (len - plen + 1)/2;
501 			len = plen;
502 		    }
503 		    for (i=0; i++ < len; ptr++) {
504 			byte |= *ptr & 1;
505 			if (i & 7)
506 			    byte <<= 1;
507 			else {
508 			    char c = castchar(byte);
509 			    rb_str_buf_cat(res, &c, 1);
510 			    byte = 0;
511 			}
512 		    }
513 		    if (len & 7) {
514 			char c;
515 			byte <<= 7 - (len & 7);
516 			c = castchar(byte);
517 			rb_str_buf_cat(res, &c, 1);
518 		    }
519 		    len = j;
520 		    goto grow;
521 		}
522 		break;
523 
524 	      case 'h':		/* hex string (low nibble first) */
525 		{
526 		    int byte = 0;
527 		    long i, j = 0;
528 
529 		    if (len > plen) {
530 			j = (len + 1) / 2 - (plen + 1) / 2;
531 			len = plen;
532 		    }
533 		    for (i=0; i++ < len; ptr++) {
534 			if (ISALPHA(*ptr))
535 			    byte |= (((*ptr & 15) + 9) & 15) << 4;
536 			else
537 			    byte |= (*ptr & 15) << 4;
538 			if (i & 1)
539 			    byte >>= 4;
540 			else {
541 			    char c = castchar(byte);
542 			    rb_str_buf_cat(res, &c, 1);
543 			    byte = 0;
544 			}
545 		    }
546 		    if (len & 1) {
547 			char c = castchar(byte);
548 			rb_str_buf_cat(res, &c, 1);
549 		    }
550 		    len = j;
551 		    goto grow;
552 		}
553 		break;
554 
555 	      case 'H':		/* hex string (high nibble first) */
556 		{
557 		    int byte = 0;
558 		    long i, j = 0;
559 
560 		    if (len > plen) {
561 			j = (len + 1) / 2 - (plen + 1) / 2;
562 			len = plen;
563 		    }
564 		    for (i=0; i++ < len; ptr++) {
565 			if (ISALPHA(*ptr))
566 			    byte |= ((*ptr & 15) + 9) & 15;
567 			else
568 			    byte |= *ptr & 15;
569 			if (i & 1)
570 			    byte <<= 4;
571 			else {
572 			    char c = castchar(byte);
573 			    rb_str_buf_cat(res, &c, 1);
574 			    byte = 0;
575 			}
576 		    }
577 		    if (len & 1) {
578 			char c = castchar(byte);
579 			rb_str_buf_cat(res, &c, 1);
580 		    }
581 		    len = j;
582 		    goto grow;
583 		}
584 		break;
585 	    }
586 	    break;
587 
588 	  case 'c':		/* signed char */
589 	  case 'C':		/* unsigned char */
590             integer_size = 1;
591             bigendian_p = BIGENDIAN_P(); /* not effective */
592             goto pack_integer;
593 
594 	  case 's':		/* s for int16_t, s! for signed short */
595             integer_size = NATINT_LEN(short, 2);
596             bigendian_p = BIGENDIAN_P();
597             goto pack_integer;
598 
599 	  case 'S':		/* S for uint16_t, S! for unsigned short */
600             integer_size = NATINT_LEN(short, 2);
601             bigendian_p = BIGENDIAN_P();
602             goto pack_integer;
603 
604 	  case 'i':		/* i and i! for signed int */
605             integer_size = (int)sizeof(int);
606             bigendian_p = BIGENDIAN_P();
607             goto pack_integer;
608 
609 	  case 'I':		/* I and I! for unsigned int */
610             integer_size = (int)sizeof(int);
611             bigendian_p = BIGENDIAN_P();
612             goto pack_integer;
613 
614 	  case 'l':		/* l for int32_t, l! for signed long */
615             integer_size = NATINT_LEN(long, 4);
616             bigendian_p = BIGENDIAN_P();
617             goto pack_integer;
618 
619 	  case 'L':		/* L for uint32_t, L! for unsigned long */
620             integer_size = NATINT_LEN(long, 4);
621             bigendian_p = BIGENDIAN_P();
622             goto pack_integer;
623 
624 	  case 'q':		/* q for int64_t, q! for signed long long */
625 	    integer_size = NATINT_LEN_Q;
626             bigendian_p = BIGENDIAN_P();
627             goto pack_integer;
628 
629 	  case 'Q':		/* Q for uint64_t, Q! for unsigned long long */
630 	    integer_size = NATINT_LEN_Q;
631             bigendian_p = BIGENDIAN_P();
632             goto pack_integer;
633 
634 	  case 'j':		/* j for intptr_t */
635 	    integer_size = sizeof(intptr_t);
636 	    bigendian_p = BIGENDIAN_P();
637 	    goto pack_integer;
638 
639 	  case 'J':		/* J for uintptr_t */
640 	    integer_size = sizeof(uintptr_t);
641 	    bigendian_p = BIGENDIAN_P();
642 	    goto pack_integer;
643 
644 	  case 'n':		/* 16 bit (2 bytes) integer (network byte-order)  */
645             integer_size = 2;
646             bigendian_p = 1;
647             goto pack_integer;
648 
649 	  case 'N':		/* 32 bit (4 bytes) integer (network byte-order) */
650             integer_size = 4;
651             bigendian_p = 1;
652             goto pack_integer;
653 
654 	  case 'v':		/* 16 bit (2 bytes) integer (VAX byte-order) */
655             integer_size = 2;
656             bigendian_p = 0;
657             goto pack_integer;
658 
659 	  case 'V':		/* 32 bit (4 bytes) integer (VAX byte-order) */
660             integer_size = 4;
661             bigendian_p = 0;
662             goto pack_integer;
663 
664           pack_integer:
665 	    if (explicit_endian) {
666 		bigendian_p = explicit_endian == '>';
667 	    }
668             if (integer_size > MAX_INTEGER_PACK_SIZE)
669                 rb_bug("unexpected intger size for pack: %d", integer_size);
670             while (len-- > 0) {
671                 char intbuf[MAX_INTEGER_PACK_SIZE];
672 
673                 from = NEXTFROM;
674                 rb_integer_pack(from, intbuf, integer_size, 1, 0,
675                     INTEGER_PACK_2COMP |
676                     (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
677                 rb_str_buf_cat(res, intbuf, integer_size);
678             }
679 	    break;
680 
681 	  case 'f':		/* single precision float in native format */
682 	  case 'F':		/* ditto */
683 	    while (len-- > 0) {
684 		float f;
685 
686 		from = NEXTFROM;
687                 f = VALUE_to_float(from);
688 		rb_str_buf_cat(res, (char*)&f, sizeof(float));
689 	    }
690 	    break;
691 
692 	  case 'e':		/* single precision float in VAX byte-order */
693 	    while (len-- > 0) {
694 		FLOAT_CONVWITH(tmp);
695 
696 		from = NEXTFROM;
697                 tmp.f = VALUE_to_float(from);
698 		HTOVF(tmp);
699 		rb_str_buf_cat(res, tmp.buf, sizeof(float));
700 	    }
701 	    break;
702 
703 	  case 'E':		/* double precision float in VAX byte-order */
704 	    while (len-- > 0) {
705 		DOUBLE_CONVWITH(tmp);
706 		from = NEXTFROM;
707 		tmp.d = RFLOAT_VALUE(rb_to_float(from));
708 		HTOVD(tmp);
709 		rb_str_buf_cat(res, tmp.buf, sizeof(double));
710 	    }
711 	    break;
712 
713 	  case 'd':		/* double precision float in native format */
714 	  case 'D':		/* ditto */
715 	    while (len-- > 0) {
716 		double d;
717 
718 		from = NEXTFROM;
719 		d = RFLOAT_VALUE(rb_to_float(from));
720 		rb_str_buf_cat(res, (char*)&d, sizeof(double));
721 	    }
722 	    break;
723 
724 	  case 'g':		/* single precision float in network byte-order */
725 	    while (len-- > 0) {
726 		FLOAT_CONVWITH(tmp);
727 		from = NEXTFROM;
728                 tmp.f = VALUE_to_float(from);
729 		HTONF(tmp);
730 		rb_str_buf_cat(res, tmp.buf, sizeof(float));
731 	    }
732 	    break;
733 
734 	  case 'G':		/* double precision float in network byte-order */
735 	    while (len-- > 0) {
736 		DOUBLE_CONVWITH(tmp);
737 
738 		from = NEXTFROM;
739 		tmp.d = RFLOAT_VALUE(rb_to_float(from));
740 		HTOND(tmp);
741 		rb_str_buf_cat(res, tmp.buf, sizeof(double));
742 	    }
743 	    break;
744 
745 	  case 'x':		/* null byte */
746 	  grow:
747 	    while (len >= 10) {
748 		rb_str_buf_cat(res, nul10, 10);
749 		len -= 10;
750 	    }
751 	    rb_str_buf_cat(res, nul10, len);
752 	    break;
753 
754 	  case 'X':		/* back up byte */
755 	  shrink:
756 	    plen = RSTRING_LEN(res);
757 	    if (plen < len)
758 		rb_raise(rb_eArgError, "X outside of string");
759 	    rb_str_set_len(res, plen - len);
760 	    break;
761 
762 	  case '@':		/* null fill to absolute position */
763 	    len -= RSTRING_LEN(res);
764 	    if (len > 0) goto grow;
765 	    len = -len;
766 	    if (len > 0) goto shrink;
767 	    break;
768 
769 	  case '%':
770 	    rb_raise(rb_eArgError, "%% is not supported");
771 	    break;
772 
773 	  case 'U':		/* Unicode character */
774 	    while (len-- > 0) {
775 		SIGNED_VALUE l;
776 		char buf[8];
777 		int le;
778 
779 		from = NEXTFROM;
780 		from = rb_to_int(from);
781 		l = NUM2LONG(from);
782 		if (l < 0) {
783 		    rb_raise(rb_eRangeError, "pack(U): value out of range");
784 		}
785 		le = rb_uv_to_utf8(buf, l);
786 		rb_str_buf_cat(res, (char*)buf, le);
787 	    }
788 	    break;
789 
790 	  case 'u':		/* uuencoded string */
791 	  case 'm':		/* base64 encoded string */
792 	    from = NEXTFROM;
793 	    StringValue(from);
794 	    ptr = RSTRING_PTR(from);
795 	    plen = RSTRING_LEN(from);
796             OBJ_INFECT(res, from);
797 
798 	    if (len == 0 && type == 'm') {
799 		encodes(res, ptr, plen, type, 0);
800 		ptr += plen;
801 		break;
802 	    }
803 	    if (len <= 2)
804 		len = 45;
805 	    else if (len > 63 && type == 'u')
806 		len = 63;
807 	    else
808 		len = len / 3 * 3;
809 	    while (plen > 0) {
810 		long todo;
811 
812 		if (plen > len)
813 		    todo = len;
814 		else
815 		    todo = plen;
816 		encodes(res, ptr, todo, type, 1);
817 		plen -= todo;
818 		ptr += todo;
819 	    }
820 	    break;
821 
822 	  case 'M':		/* quoted-printable encoded string */
823 	    from = rb_obj_as_string(NEXTFROM);
824             OBJ_INFECT(res, from);
825 	    if (len <= 1)
826 		len = 72;
827 	    qpencode(res, from, len);
828 	    break;
829 
830 	  case 'P':		/* pointer to packed byte string */
831 	    from = THISFROM;
832 	    if (!NIL_P(from)) {
833 		StringValue(from);
834 		if (RSTRING_LEN(from) < len) {
835 		    rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
836 			     RSTRING_LEN(from), len);
837 		}
838 	    }
839 	    len = 1;
840 	    /* FALL THROUGH */
841 	  case 'p':		/* pointer to string */
842 	    while (len-- > 0) {
843 		char *t;
844 		from = NEXTFROM;
845 		if (NIL_P(from)) {
846 		    t = 0;
847 		}
848 		else {
849 		    t = StringValuePtr(from);
850                     OBJ_INFECT(res, from);
851 		    rb_obj_taint(from);
852 		}
853 		if (!associates) {
854 		    associates = rb_ary_new();
855 		}
856 		rb_ary_push(associates, from);
857 		rb_str_buf_cat(res, (char*)&t, sizeof(char*));
858 	    }
859 	    break;
860 
861 	  case 'w':		/* BER compressed integer  */
862 	    while (len-- > 0) {
863 		VALUE buf = rb_str_new(0, 0);
864                 size_t numbytes;
865                 int sign;
866                 char *cp;
867 
868 		from = NEXTFROM;
869                 from = rb_to_int(from);
870                 numbytes = rb_absint_numwords(from, 7, NULL);
871                 if (numbytes == 0)
872                     numbytes = 1;
873                 buf = rb_str_new(NULL, numbytes);
874 
875                 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
876 
877                 if (sign < 0)
878                     rb_raise(rb_eArgError, "can't compress negative numbers");
879                 if (sign == 2)
880                     rb_bug("buffer size problem?");
881 
882                 cp = RSTRING_PTR(buf);
883                 while (1 < numbytes) {
884                     *cp |= 0x80;
885                     cp++;
886                     numbytes--;
887                 }
888 
889                 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
890 	    }
891 	    break;
892 
893 	  default: {
894             unknown_directive("pack", type, fmt);
895 	    break;
896 	  }
897 	}
898     }
899 
900     if (associates) {
901 	str_associate(res, associates);
902     }
903     OBJ_INFECT(res, fmt);
904     switch (enc_info) {
905       case 1:
906 	ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
907 	break;
908       case 2:
909 	rb_enc_set_index(res, rb_utf8_encindex());
910 	break;
911       default:
912 	/* do nothing, keep ASCII-8BIT */
913 	break;
914     }
915     return res;
916 }
917 
918 static const char uu_table[] =
919 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
920 static const char b64_table[] =
921 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
922 
923 static void
encodes(VALUE str,const char * s0,long len,int type,int tail_lf)924 encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
925 {
926     enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
927     char buff[buff_size + 1];	/* +1 for tail_lf */
928     long i = 0;
929     const char *const trans = type == 'u' ? uu_table : b64_table;
930     char padding;
931     const unsigned char *s = (const unsigned char *)s0;
932 
933     if (type == 'u') {
934 	buff[i++] = (char)len + ' ';
935 	padding = '`';
936     }
937     else {
938 	padding = '=';
939     }
940     while (len >= input_unit) {
941         while (len >= input_unit && buff_size-i >= encoded_unit) {
942             buff[i++] = trans[077 & (*s >> 2)];
943             buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
944             buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
945             buff[i++] = trans[077 & s[2]];
946             s += input_unit;
947             len -= input_unit;
948         }
949         if (buff_size-i < encoded_unit) {
950             rb_str_buf_cat(str, buff, i);
951             i = 0;
952         }
953     }
954 
955     if (len == 2) {
956 	buff[i++] = trans[077 & (*s >> 2)];
957 	buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
958 	buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
959 	buff[i++] = padding;
960     }
961     else if (len == 1) {
962 	buff[i++] = trans[077 & (*s >> 2)];
963 	buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
964 	buff[i++] = padding;
965 	buff[i++] = padding;
966     }
967     if (tail_lf) buff[i++] = '\n';
968     rb_str_buf_cat(str, buff, i);
969     if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
970 }
971 
972 static const char hex_table[] = "0123456789ABCDEF";
973 
974 static void
qpencode(VALUE str,VALUE from,long len)975 qpencode(VALUE str, VALUE from, long len)
976 {
977     char buff[1024];
978     long i = 0, n = 0, prev = EOF;
979     unsigned char *s = (unsigned char*)RSTRING_PTR(from);
980     unsigned char *send = s + RSTRING_LEN(from);
981 
982     while (s < send) {
983         if ((*s > 126) ||
984 	    (*s < 32 && *s != '\n' && *s != '\t') ||
985 	    (*s == '=')) {
986 	    buff[i++] = '=';
987 	    buff[i++] = hex_table[*s >> 4];
988 	    buff[i++] = hex_table[*s & 0x0f];
989             n += 3;
990             prev = EOF;
991         }
992 	else if (*s == '\n') {
993             if (prev == ' ' || prev == '\t') {
994 		buff[i++] = '=';
995 		buff[i++] = *s;
996             }
997 	    buff[i++] = *s;
998             n = 0;
999             prev = *s;
1000         }
1001 	else {
1002 	    buff[i++] = *s;
1003             n++;
1004             prev = *s;
1005         }
1006         if (n > len) {
1007 	    buff[i++] = '=';
1008 	    buff[i++] = '\n';
1009             n = 0;
1010             prev = '\n';
1011         }
1012 	if (i > 1024 - 5) {
1013 	    rb_str_buf_cat(str, buff, i);
1014 	    i = 0;
1015 	}
1016 	s++;
1017     }
1018     if (n > 0) {
1019 	buff[i++] = '=';
1020 	buff[i++] = '\n';
1021     }
1022     if (i > 0) {
1023 	rb_str_buf_cat(str, buff, i);
1024     }
1025 }
1026 
1027 static inline int
hex2num(char c)1028 hex2num(char c)
1029 {
1030     int n;
1031     n = ruby_digit36_to_number_table[(unsigned char)c];
1032     if (16 <= n)
1033         n = -1;
1034     return n;
1035 }
1036 
1037 #define PACK_LENGTH_ADJUST_SIZE(sz) do {	\
1038     tmp_len = 0;				\
1039     if (len > (long)((send-s)/(sz))) {		\
1040         if (!star) {				\
1041 	    tmp_len = len-(send-s)/(sz);	\
1042         }					\
1043 	len = (send-s)/(sz);			\
1044     }						\
1045 } while (0)
1046 
1047 #define PACK_ITEM_ADJUST() do { \
1048     if (tmp_len > 0 && mode == UNPACK_ARRAY) \
1049 	rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
1050 } while (0)
1051 
1052 /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
1053  * 12.4/12.5/12.6 C compiler optimization bug
1054  * with "-xO4" optimization option.
1055  */
1056 #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
1057 # define AVOID_CC_BUG volatile
1058 #else
1059 # define AVOID_CC_BUG
1060 #endif
1061 
1062 static VALUE
infected_str_new(const char * ptr,long len,VALUE str)1063 infected_str_new(const char *ptr, long len, VALUE str)
1064 {
1065     VALUE s = rb_str_new(ptr, len);
1066 
1067     OBJ_INFECT(s, str);
1068     return s;
1069 }
1070 
1071 /* unpack mode */
1072 #define UNPACK_ARRAY 0
1073 #define UNPACK_BLOCK 1
1074 #define UNPACK_1 2
1075 
1076 static VALUE
pack_unpack_internal(VALUE str,VALUE fmt,int mode)1077 pack_unpack_internal(VALUE str, VALUE fmt, int mode)
1078 {
1079 #define hexdigits ruby_hexdigits
1080     char *s, *send;
1081     char *p, *pend;
1082     VALUE ary;
1083     char type;
1084     long len;
1085     AVOID_CC_BUG long tmp_len;
1086     int star;
1087 #ifdef NATINT_PACK
1088     int natint;			/* native integer */
1089 #endif
1090     int signed_p, integer_size, bigendian_p;
1091 #define UNPACK_PUSH(item) do {\
1092 	VALUE item_val = (item);\
1093 	if ((mode) == UNPACK_BLOCK) {\
1094 	    rb_yield(item_val);\
1095 	}\
1096 	else if ((mode) == UNPACK_ARRAY) {\
1097 	    rb_ary_push(ary, item_val);\
1098 	}\
1099 	else /* if ((mode) == UNPACK_1) { */ {\
1100 	    return item_val; \
1101 	}\
1102     } while (0)
1103 
1104     StringValue(str);
1105     StringValue(fmt);
1106     s = RSTRING_PTR(str);
1107     send = s + RSTRING_LEN(str);
1108     p = RSTRING_PTR(fmt);
1109     pend = p + RSTRING_LEN(fmt);
1110 
1111     ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
1112     while (p < pend) {
1113 	int explicit_endian = 0;
1114 	type = *p++;
1115 #ifdef NATINT_PACK
1116 	natint = 0;
1117 #endif
1118 
1119 	if (ISSPACE(type)) continue;
1120 	if (type == '#') {
1121 	    while ((p < pend) && (*p != '\n')) {
1122 		p++;
1123 	    }
1124 	    continue;
1125 	}
1126 
1127 	star = 0;
1128 	{
1129           modifiers:
1130 	    switch (*p) {
1131 	      case '_':
1132 	      case '!':
1133 
1134 		if (strchr(natstr, type)) {
1135 #ifdef NATINT_PACK
1136 		    natint = 1;
1137 #endif
1138 		    p++;
1139 		}
1140 		else {
1141 		    rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1142 		}
1143 		goto modifiers;
1144 
1145 	      case '<':
1146 	      case '>':
1147 		if (!strchr(endstr, type)) {
1148 		    rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1149 		}
1150 		if (explicit_endian) {
1151 		    rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1152 		}
1153 		explicit_endian = *p++;
1154 		goto modifiers;
1155 	    }
1156 	}
1157 
1158 	if (p >= pend)
1159 	    len = 1;
1160 	else if (*p == '*') {
1161 	    star = 1;
1162 	    len = send - s;
1163 	    p++;
1164 	}
1165 	else if (ISDIGIT(*p)) {
1166 	    errno = 0;
1167 	    len = STRTOUL(p, (char**)&p, 10);
1168 	    if (len < 0 || errno) {
1169 		rb_raise(rb_eRangeError, "pack length too big");
1170 	    }
1171 	}
1172 	else {
1173 	    len = (type != '@');
1174 	}
1175 
1176 	switch (type) {
1177 	  case '%':
1178 	    rb_raise(rb_eArgError, "%% is not supported");
1179 	    break;
1180 
1181 	  case 'A':
1182 	    if (len > send - s) len = send - s;
1183 	    {
1184 		long end = len;
1185 		char *t = s + len - 1;
1186 
1187 		while (t >= s) {
1188 		    if (*t != ' ' && *t != '\0') break;
1189 		    t--; len--;
1190 		}
1191 		UNPACK_PUSH(infected_str_new(s, len, str));
1192 		s += end;
1193 	    }
1194 	    break;
1195 
1196 	  case 'Z':
1197 	    {
1198 		char *t = s;
1199 
1200 		if (len > send-s) len = send-s;
1201 		while (t < s+len && *t) t++;
1202 		UNPACK_PUSH(infected_str_new(s, t-s, str));
1203 		if (t < send) t++;
1204 		s = star ? t : s+len;
1205 	    }
1206 	    break;
1207 
1208 	  case 'a':
1209 	    if (len > send - s) len = send - s;
1210 	    UNPACK_PUSH(infected_str_new(s, len, str));
1211 	    s += len;
1212 	    break;
1213 
1214 	  case 'b':
1215 	    {
1216 		VALUE bitstr;
1217 		char *t;
1218 		int bits;
1219 		long i;
1220 
1221 		if (p[-1] == '*' || len > (send - s) * 8)
1222 		    len = (send - s) * 8;
1223 		bits = 0;
1224 		bitstr = rb_usascii_str_new(0, len);
1225                 OBJ_INFECT(bitstr, str);
1226 		t = RSTRING_PTR(bitstr);
1227 		for (i=0; i<len; i++) {
1228 		    if (i & 7) bits >>= 1;
1229 		    else bits = (unsigned char)*s++;
1230 		    *t++ = (bits & 1) ? '1' : '0';
1231 		}
1232 		UNPACK_PUSH(bitstr);
1233 	    }
1234 	    break;
1235 
1236 	  case 'B':
1237 	    {
1238 		VALUE bitstr;
1239 		char *t;
1240 		int bits;
1241 		long i;
1242 
1243 		if (p[-1] == '*' || len > (send - s) * 8)
1244 		    len = (send - s) * 8;
1245 		bits = 0;
1246 		bitstr = rb_usascii_str_new(0, len);
1247                 OBJ_INFECT(bitstr, str);
1248 		t = RSTRING_PTR(bitstr);
1249 		for (i=0; i<len; i++) {
1250 		    if (i & 7) bits <<= 1;
1251 		    else bits = (unsigned char)*s++;
1252 		    *t++ = (bits & 128) ? '1' : '0';
1253 		}
1254 		UNPACK_PUSH(bitstr);
1255 	    }
1256 	    break;
1257 
1258 	  case 'h':
1259 	    {
1260 		VALUE bitstr;
1261 		char *t;
1262 		int bits;
1263 		long i;
1264 
1265 		if (p[-1] == '*' || len > (send - s) * 2)
1266 		    len = (send - s) * 2;
1267 		bits = 0;
1268 		bitstr = rb_usascii_str_new(0, len);
1269                 OBJ_INFECT(bitstr, str);
1270 		t = RSTRING_PTR(bitstr);
1271 		for (i=0; i<len; i++) {
1272 		    if (i & 1)
1273 			bits >>= 4;
1274 		    else
1275 			bits = (unsigned char)*s++;
1276 		    *t++ = hexdigits[bits & 15];
1277 		}
1278 		UNPACK_PUSH(bitstr);
1279 	    }
1280 	    break;
1281 
1282 	  case 'H':
1283 	    {
1284 		VALUE bitstr;
1285 		char *t;
1286 		int bits;
1287 		long i;
1288 
1289 		if (p[-1] == '*' || len > (send - s) * 2)
1290 		    len = (send - s) * 2;
1291 		bits = 0;
1292 		bitstr = rb_usascii_str_new(0, len);
1293                 OBJ_INFECT(bitstr, str);
1294 		t = RSTRING_PTR(bitstr);
1295 		for (i=0; i<len; i++) {
1296 		    if (i & 1)
1297 			bits <<= 4;
1298 		    else
1299 			bits = (unsigned char)*s++;
1300 		    *t++ = hexdigits[(bits >> 4) & 15];
1301 		}
1302 		UNPACK_PUSH(bitstr);
1303 	    }
1304 	    break;
1305 
1306 	  case 'c':
1307 	    signed_p = 1;
1308 	    integer_size = 1;
1309 	    bigendian_p = BIGENDIAN_P(); /* not effective */
1310 	    goto unpack_integer;
1311 
1312 	  case 'C':
1313 	    signed_p = 0;
1314 	    integer_size = 1;
1315 	    bigendian_p = BIGENDIAN_P(); /* not effective */
1316 	    goto unpack_integer;
1317 
1318 	  case 's':
1319 	    signed_p = 1;
1320 	    integer_size = NATINT_LEN(short, 2);
1321 	    bigendian_p = BIGENDIAN_P();
1322 	    goto unpack_integer;
1323 
1324 	  case 'S':
1325 	    signed_p = 0;
1326 	    integer_size = NATINT_LEN(short, 2);
1327 	    bigendian_p = BIGENDIAN_P();
1328 	    goto unpack_integer;
1329 
1330 	  case 'i':
1331 	    signed_p = 1;
1332 	    integer_size = (int)sizeof(int);
1333 	    bigendian_p = BIGENDIAN_P();
1334 	    goto unpack_integer;
1335 
1336 	  case 'I':
1337 	    signed_p = 0;
1338 	    integer_size = (int)sizeof(int);
1339 	    bigendian_p = BIGENDIAN_P();
1340 	    goto unpack_integer;
1341 
1342 	  case 'l':
1343 	    signed_p = 1;
1344 	    integer_size = NATINT_LEN(long, 4);
1345 	    bigendian_p = BIGENDIAN_P();
1346 	    goto unpack_integer;
1347 
1348 	  case 'L':
1349 	    signed_p = 0;
1350 	    integer_size = NATINT_LEN(long, 4);
1351 	    bigendian_p = BIGENDIAN_P();
1352 	    goto unpack_integer;
1353 
1354 	  case 'q':
1355 	    signed_p = 1;
1356 	    integer_size = NATINT_LEN_Q;
1357 	    bigendian_p = BIGENDIAN_P();
1358 	    goto unpack_integer;
1359 
1360 	  case 'Q':
1361 	    signed_p = 0;
1362 	    integer_size = NATINT_LEN_Q;
1363 	    bigendian_p = BIGENDIAN_P();
1364 	    goto unpack_integer;
1365 
1366 	  case 'j':
1367 	    signed_p = 1;
1368 	    integer_size = sizeof(intptr_t);
1369 	    bigendian_p = BIGENDIAN_P();
1370 	    goto unpack_integer;
1371 
1372 	  case 'J':
1373 	    signed_p = 0;
1374 	    integer_size = sizeof(uintptr_t);
1375 	    bigendian_p = BIGENDIAN_P();
1376 	    goto unpack_integer;
1377 
1378 	  case 'n':
1379 	    signed_p = 0;
1380 	    integer_size = 2;
1381 	    bigendian_p = 1;
1382 	    goto unpack_integer;
1383 
1384 	  case 'N':
1385 	    signed_p = 0;
1386 	    integer_size = 4;
1387 	    bigendian_p = 1;
1388 	    goto unpack_integer;
1389 
1390 	  case 'v':
1391 	    signed_p = 0;
1392 	    integer_size = 2;
1393 	    bigendian_p = 0;
1394 	    goto unpack_integer;
1395 
1396 	  case 'V':
1397 	    signed_p = 0;
1398 	    integer_size = 4;
1399 	    bigendian_p = 0;
1400 	    goto unpack_integer;
1401 
1402 	  unpack_integer:
1403 	    if (explicit_endian) {
1404 		bigendian_p = explicit_endian == '>';
1405 	    }
1406             PACK_LENGTH_ADJUST_SIZE(integer_size);
1407             while (len-- > 0) {
1408                 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1409                 VALUE val;
1410                 if (signed_p)
1411                     flags |= INTEGER_PACK_2COMP;
1412                 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1413                 UNPACK_PUSH(val);
1414                 s += integer_size;
1415             }
1416             PACK_ITEM_ADJUST();
1417             break;
1418 
1419 	  case 'f':
1420 	  case 'F':
1421 	    PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1422 	    while (len-- > 0) {
1423 		float tmp;
1424 		memcpy(&tmp, s, sizeof(float));
1425 		s += sizeof(float);
1426 		UNPACK_PUSH(DBL2NUM((double)tmp));
1427 	    }
1428 	    PACK_ITEM_ADJUST();
1429 	    break;
1430 
1431 	  case 'e':
1432 	    PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1433 	    while (len-- > 0) {
1434 		FLOAT_CONVWITH(tmp);
1435 		memcpy(tmp.buf, s, sizeof(float));
1436 		s += sizeof(float);
1437 		VTOHF(tmp);
1438 		UNPACK_PUSH(DBL2NUM(tmp.f));
1439 	    }
1440 	    PACK_ITEM_ADJUST();
1441 	    break;
1442 
1443 	  case 'E':
1444 	    PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1445 	    while (len-- > 0) {
1446 		DOUBLE_CONVWITH(tmp);
1447 		memcpy(tmp.buf, s, sizeof(double));
1448 		s += sizeof(double);
1449 		VTOHD(tmp);
1450 		UNPACK_PUSH(DBL2NUM(tmp.d));
1451 	    }
1452 	    PACK_ITEM_ADJUST();
1453 	    break;
1454 
1455 	  case 'D':
1456 	  case 'd':
1457 	    PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1458 	    while (len-- > 0) {
1459 		double tmp;
1460 		memcpy(&tmp, s, sizeof(double));
1461 		s += sizeof(double);
1462 		UNPACK_PUSH(DBL2NUM(tmp));
1463 	    }
1464 	    PACK_ITEM_ADJUST();
1465 	    break;
1466 
1467 	  case 'g':
1468 	    PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1469 	    while (len-- > 0) {
1470 		FLOAT_CONVWITH(tmp);
1471 		memcpy(tmp.buf, s, sizeof(float));
1472 		s += sizeof(float);
1473 		NTOHF(tmp);
1474 		UNPACK_PUSH(DBL2NUM(tmp.f));
1475 	    }
1476 	    PACK_ITEM_ADJUST();
1477 	    break;
1478 
1479 	  case 'G':
1480 	    PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1481 	    while (len-- > 0) {
1482 		DOUBLE_CONVWITH(tmp);
1483 		memcpy(tmp.buf, s, sizeof(double));
1484 		s += sizeof(double);
1485 		NTOHD(tmp);
1486 		UNPACK_PUSH(DBL2NUM(tmp.d));
1487 	    }
1488 	    PACK_ITEM_ADJUST();
1489 	    break;
1490 
1491 	  case 'U':
1492 	    if (len > send - s) len = send - s;
1493 	    while (len > 0 && s < send) {
1494 		long alen = send - s;
1495 		unsigned long l;
1496 
1497 		l = utf8_to_uv(s, &alen);
1498 		s += alen; len--;
1499 		UNPACK_PUSH(ULONG2NUM(l));
1500 	    }
1501 	    break;
1502 
1503 	  case 'u':
1504 	    {
1505 		VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1506 		char *ptr = RSTRING_PTR(buf);
1507 		long total = 0;
1508 
1509 		while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1510 		    long a,b,c,d;
1511 		    char hunk[3];
1512 
1513 		    len = ((unsigned char)*s++ - ' ') & 077;
1514 
1515 		    total += len;
1516 		    if (total > RSTRING_LEN(buf)) {
1517 			len -= total - RSTRING_LEN(buf);
1518 			total = RSTRING_LEN(buf);
1519 		    }
1520 
1521 		    while (len > 0) {
1522 			long mlen = len > 3 ? 3 : len;
1523 
1524 			if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1525 			    a = ((unsigned char)*s++ - ' ') & 077;
1526 			else
1527 			    a = 0;
1528 			if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1529 			    b = ((unsigned char)*s++ - ' ') & 077;
1530 			else
1531 			    b = 0;
1532 			if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1533 			    c = ((unsigned char)*s++ - ' ') & 077;
1534 			else
1535 			    c = 0;
1536 			if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1537 			    d = ((unsigned char)*s++ - ' ') & 077;
1538 			else
1539 			    d = 0;
1540 			hunk[0] = (char)(a << 2 | b >> 4);
1541 			hunk[1] = (char)(b << 4 | c >> 2);
1542 			hunk[2] = (char)(c << 6 | d);
1543 			memcpy(ptr, hunk, mlen);
1544 			ptr += mlen;
1545 			len -= mlen;
1546 		    }
1547 		    if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1548 			s++;	/* possible checksum byte */
1549 		    if (s < send && *s == '\r') s++;
1550 		    if (s < send && *s == '\n') s++;
1551 		}
1552 
1553 		rb_str_set_len(buf, total);
1554 		UNPACK_PUSH(buf);
1555 	    }
1556 	    break;
1557 
1558 	  case 'm':
1559 	    {
1560 		VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str); /* +3 is for skipping paddings */
1561 		char *ptr = RSTRING_PTR(buf);
1562 		int a = -1,b = -1,c = 0,d = 0;
1563 		static signed char b64_xtable[256];
1564 
1565 		if (b64_xtable['/'] <= 0) {
1566 		    int i;
1567 
1568 		    for (i = 0; i < 256; i++) {
1569 			b64_xtable[i] = -1;
1570 		    }
1571 		    for (i = 0; i < 64; i++) {
1572 			b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1573 		    }
1574 		}
1575 		if (len == 0) {
1576 		    while (s < send) {
1577 			a = b = c = d = -1;
1578 			a = b64_xtable[(unsigned char)*s++];
1579 			if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1580 			b = b64_xtable[(unsigned char)*s++];
1581 			if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1582 			if (*s == '=') {
1583 			    if (s + 2 == send && *(s + 1) == '=') break;
1584 			    rb_raise(rb_eArgError, "invalid base64");
1585 			}
1586 			c = b64_xtable[(unsigned char)*s++];
1587 			if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1588 			if (s + 1 == send && *s == '=') break;
1589 			d = b64_xtable[(unsigned char)*s++];
1590 			if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1591 			*ptr++ = castchar(a << 2 | b >> 4);
1592 			*ptr++ = castchar(b << 4 | c >> 2);
1593 			*ptr++ = castchar(c << 6 | d);
1594 		    }
1595 		    if (c == -1) {
1596 			*ptr++ = castchar(a << 2 | b >> 4);
1597 			if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1598 		    }
1599 		    else if (d == -1) {
1600 			*ptr++ = castchar(a << 2 | b >> 4);
1601 			*ptr++ = castchar(b << 4 | c >> 2);
1602 			if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1603 		    }
1604 		}
1605 		else {
1606 		    while (s < send) {
1607 			a = b = c = d = -1;
1608 			while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1609 			if (s >= send) break;
1610 			s++;
1611 			while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1612 			if (s >= send) break;
1613 			s++;
1614 			while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1615 			if (*s == '=' || s >= send) break;
1616 			s++;
1617 			while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1618 			if (*s == '=' || s >= send) break;
1619 			s++;
1620 			*ptr++ = castchar(a << 2 | b >> 4);
1621 			*ptr++ = castchar(b << 4 | c >> 2);
1622 			*ptr++ = castchar(c << 6 | d);
1623 			a = -1;
1624 		    }
1625 		    if (a != -1 && b != -1) {
1626 			if (c == -1)
1627 			    *ptr++ = castchar(a << 2 | b >> 4);
1628 			else {
1629 			    *ptr++ = castchar(a << 2 | b >> 4);
1630 			    *ptr++ = castchar(b << 4 | c >> 2);
1631 			}
1632 		    }
1633 		}
1634 		rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1635 		UNPACK_PUSH(buf);
1636 	    }
1637 	    break;
1638 
1639 	  case 'M':
1640 	    {
1641 		VALUE buf = infected_str_new(0, send - s, str);
1642 		char *ptr = RSTRING_PTR(buf), *ss = s;
1643 		int csum = 0;
1644 		int c1, c2;
1645 
1646 		while (s < send) {
1647 		    if (*s == '=') {
1648 			if (++s == send) break;
1649 			if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1650 			    s++;
1651 			if (*s != '\n') {
1652 			    if ((c1 = hex2num(*s)) == -1) break;
1653 			    if (++s == send) break;
1654 			    if ((c2 = hex2num(*s)) == -1) break;
1655 			    csum |= *ptr++ = castchar(c1 << 4 | c2);
1656 			}
1657 		    }
1658 		    else {
1659 			csum |= *ptr++ = *s;
1660 		    }
1661 		    s++;
1662 		    ss = s;
1663 		}
1664 		rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1665 		rb_str_buf_cat(buf, ss, send-ss);
1666 		csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
1667 		ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1668 		UNPACK_PUSH(buf);
1669 	    }
1670 	    break;
1671 
1672 	  case '@':
1673 	    if (len > RSTRING_LEN(str))
1674 		rb_raise(rb_eArgError, "@ outside of string");
1675 	    s = RSTRING_PTR(str) + len;
1676 	    break;
1677 
1678 	  case 'X':
1679 	    if (len > s - RSTRING_PTR(str))
1680 		rb_raise(rb_eArgError, "X outside of string");
1681 	    s -= len;
1682 	    break;
1683 
1684 	  case 'x':
1685 	    if (len > send - s)
1686 		rb_raise(rb_eArgError, "x outside of string");
1687 	    s += len;
1688 	    break;
1689 
1690 	  case 'P':
1691 	    if (sizeof(char *) <= (size_t)(send - s)) {
1692 		VALUE tmp = Qnil;
1693 		char *t;
1694 
1695 		memcpy(&t, s, sizeof(char *));
1696 		s += sizeof(char *);
1697 
1698 		if (t) {
1699 		    VALUE a;
1700 		    const VALUE *p, *pend;
1701 
1702 		    if (!(a = str_associated(str))) {
1703 			rb_raise(rb_eArgError, "no associated pointer");
1704 		    }
1705 		    p = RARRAY_CONST_PTR(a);
1706 		    pend = p + RARRAY_LEN(a);
1707 		    while (p < pend) {
1708 			if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1709 			    if (len < RSTRING_LEN(*p)) {
1710 				tmp = rb_tainted_str_new(t, len);
1711 				str_associate(tmp, a);
1712 			    }
1713 			    else {
1714 				tmp = *p;
1715 			    }
1716 			    break;
1717 			}
1718 			p++;
1719 		    }
1720 		    if (p == pend) {
1721 			rb_raise(rb_eArgError, "non associated pointer");
1722 		    }
1723 		}
1724 		UNPACK_PUSH(tmp);
1725 	    }
1726 	    break;
1727 
1728 	  case 'p':
1729 	    if (len > (long)((send - s) / sizeof(char *)))
1730 		len = (send - s) / sizeof(char *);
1731 	    while (len-- > 0) {
1732 		if ((size_t)(send - s) < sizeof(char *))
1733 		    break;
1734 		else {
1735 		    VALUE tmp = Qnil;
1736 		    char *t;
1737 
1738 		    memcpy(&t, s, sizeof(char *));
1739 		    s += sizeof(char *);
1740 
1741 		    if (t) {
1742 			VALUE a;
1743 			const VALUE *p, *pend;
1744 
1745 			if (!(a = str_associated(str))) {
1746 			    rb_raise(rb_eArgError, "no associated pointer");
1747 			}
1748 			p = RARRAY_CONST_PTR(a);
1749 			pend = p + RARRAY_LEN(a);
1750 			while (p < pend) {
1751 			    if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1752 				tmp = *p;
1753 				break;
1754 			    }
1755 			    p++;
1756 			}
1757 			if (p == pend) {
1758 			    rb_raise(rb_eArgError, "non associated pointer");
1759 			}
1760 		    }
1761 		    UNPACK_PUSH(tmp);
1762 		}
1763 	    }
1764 	    break;
1765 
1766 	  case 'w':
1767 	    {
1768                 char *s0 = s;
1769                 while (len > 0 && s < send) {
1770                     if (*s & 0x80) {
1771                         s++;
1772                     }
1773                     else {
1774                         s++;
1775                         UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1776                         len--;
1777                         s0 = s;
1778                     }
1779                 }
1780 	    }
1781 	    break;
1782 
1783 	  default:
1784             unknown_directive("unpack", type, fmt);
1785 	    break;
1786 	}
1787     }
1788 
1789     return ary;
1790 }
1791 
1792 /*
1793  *  call-seq:
1794  *     str.unpack(format)    ->  anArray
1795  *
1796  *  Decodes <i>str</i> (which may contain binary data) according to the
1797  *  format string, returning an array of each value extracted. The
1798  *  format string consists of a sequence of single-character directives,
1799  *  summarized in the table at the end of this entry.
1800  *  Each directive may be followed
1801  *  by a number, indicating the number of times to repeat with this
1802  *  directive. An asterisk (``<code>*</code>'') will use up all
1803  *  remaining elements. The directives <code>sSiIlL</code> may each be
1804  *  followed by an underscore (``<code>_</code>'') or
1805  *  exclamation mark (``<code>!</code>'') to use the underlying
1806  *  platform's native size for the specified type; otherwise, it uses a
1807  *  platform-independent consistent size. Spaces are ignored in the
1808  *  format string. See also <code>String#unpack1</code>,  <code>Array#pack</code>.
1809  *
1810  *     "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
1811  *     "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
1812  *     "abc \0abc \0".unpack('Z*Z*')       #=> ["abc ", "abc "]
1813  *     "aa".unpack('b8B8')                 #=> ["10000110", "01100001"]
1814  *     "aaa".unpack('h2H2c')               #=> ["16", "61", 97]
1815  *     "\xfe\xff\xfe\xff".unpack('sS')     #=> [-2, 65534]
1816  *     "now=20is".unpack('M*')             #=> ["now is"]
1817  *     "whole".unpack('xax2aX2aX1aX2a')    #=> ["h", "e", "l", "l", "o"]
1818  *
1819  *  This table summarizes the various formats and the Ruby classes
1820  *  returned by each.
1821  *
1822  *   Integer       |         |
1823  *   Directive     | Returns | Meaning
1824  *   ------------------------------------------------------------------
1825  *   C             | Integer | 8-bit unsigned (unsigned char)
1826  *   S             | Integer | 16-bit unsigned, native endian (uint16_t)
1827  *   L             | Integer | 32-bit unsigned, native endian (uint32_t)
1828  *   Q             | Integer | 64-bit unsigned, native endian (uint64_t)
1829  *   J             | Integer | pointer width unsigned, native endian (uintptr_t)
1830  *                 |         |
1831  *   c             | Integer | 8-bit signed (signed char)
1832  *   s             | Integer | 16-bit signed, native endian (int16_t)
1833  *   l             | Integer | 32-bit signed, native endian (int32_t)
1834  *   q             | Integer | 64-bit signed, native endian (int64_t)
1835  *   j             | Integer | pointer width signed, native endian (intptr_t)
1836  *                 |         |
1837  *   S_ S!         | Integer | unsigned short, native endian
1838  *   I I_ I!       | Integer | unsigned int, native endian
1839  *   L_ L!         | Integer | unsigned long, native endian
1840  *   Q_ Q!         | Integer | unsigned long long, native endian (ArgumentError
1841  *                 |         | if the platform has no long long type.)
1842  *   J!            | Integer | uintptr_t, native endian (same with J)
1843  *                 |         |
1844  *   s_ s!         | Integer | signed short, native endian
1845  *   i i_ i!       | Integer | signed int, native endian
1846  *   l_ l!         | Integer | signed long, native endian
1847  *   q_ q!         | Integer | signed long long, native endian (ArgumentError
1848  *                 |         | if the platform has no long long type.)
1849  *   j!            | Integer | intptr_t, native endian (same with j)
1850  *                 |         |
1851  *   S> s> S!> s!> | Integer | same as the directives without ">" except
1852  *   L> l> L!> l!> |         | big endian
1853  *   I!> i!>       |         |
1854  *   Q> q> Q!> q!> |         | "S>" is same as "n"
1855  *   J> j> J!> j!> |         | "L>" is same as "N"
1856  *                 |         |
1857  *   S< s< S!< s!< | Integer | same as the directives without "<" except
1858  *   L< l< L!< l!< |         | little endian
1859  *   I!< i!<       |         |
1860  *   Q< q< Q!< q!< |         | "S<" is same as "v"
1861  *   J< j< J!< j!< |         | "L<" is same as "V"
1862  *                 |         |
1863  *   n             | Integer | 16-bit unsigned, network (big-endian) byte order
1864  *   N             | Integer | 32-bit unsigned, network (big-endian) byte order
1865  *   v             | Integer | 16-bit unsigned, VAX (little-endian) byte order
1866  *   V             | Integer | 32-bit unsigned, VAX (little-endian) byte order
1867  *                 |         |
1868  *   U             | Integer | UTF-8 character
1869  *   w             | Integer | BER-compressed integer (see Array.pack)
1870  *
1871  *   Float        |         |
1872  *   Directive    | Returns | Meaning
1873  *   -----------------------------------------------------------------
1874  *   D d          | Float   | double-precision, native format
1875  *   F f          | Float   | single-precision, native format
1876  *   E            | Float   | double-precision, little-endian byte order
1877  *   e            | Float   | single-precision, little-endian byte order
1878  *   G            | Float   | double-precision, network (big-endian) byte order
1879  *   g            | Float   | single-precision, network (big-endian) byte order
1880  *
1881  *   String       |         |
1882  *   Directive    | Returns | Meaning
1883  *   -----------------------------------------------------------------
1884  *   A            | String  | arbitrary binary string (remove trailing nulls and ASCII spaces)
1885  *   a            | String  | arbitrary binary string
1886  *   Z            | String  | null-terminated string
1887  *   B            | String  | bit string (MSB first)
1888  *   b            | String  | bit string (LSB first)
1889  *   H            | String  | hex string (high nibble first)
1890  *   h            | String  | hex string (low nibble first)
1891  *   u            | String  | UU-encoded string
1892  *   M            | String  | quoted-printable, MIME encoding (see RFC2045)
1893  *   m            | String  | base64 encoded string (RFC 2045) (default)
1894  *                |         | base64 encoded string (RFC 4648) if followed by 0
1895  *   P            | String  | pointer to a structure (fixed-length string)
1896  *   p            | String  | pointer to a null-terminated string
1897  *
1898  *   Misc.        |         |
1899  *   Directive    | Returns | Meaning
1900  *   -----------------------------------------------------------------
1901  *   @            | ---     | skip to the offset given by the length argument
1902  *   X            | ---     | skip backward one byte
1903  *   x            | ---     | skip forward one byte
1904  *
1905  *  HISTORY
1906  *
1907  *  * J, J! j, and j! are available since Ruby 2.3.
1908  *  * Q_, Q!, q_, and q! are available since Ruby 2.1.
1909  *  * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
1910  */
1911 
1912 static VALUE
pack_unpack(VALUE str,VALUE fmt)1913 pack_unpack(VALUE str, VALUE fmt)
1914 {
1915     int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1916     return pack_unpack_internal(str, fmt, mode);
1917 }
1918 
1919 /*
1920  *  call-seq:
1921  *     str.unpack1(format)    ->  obj
1922  *
1923  *  Decodes <i>str</i> (which may contain binary data) according to the
1924  *  format string, returning the first value extracted.
1925  *  See also <code>String#unpack</code>, <code>Array#pack</code>.
1926  */
1927 
1928 static VALUE
pack_unpack1(VALUE str,VALUE fmt)1929 pack_unpack1(VALUE str, VALUE fmt)
1930 {
1931     return pack_unpack_internal(str, fmt, UNPACK_1);
1932 }
1933 
1934 int
rb_uv_to_utf8(char buf[6],unsigned long uv)1935 rb_uv_to_utf8(char buf[6], unsigned long uv)
1936 {
1937     if (uv <= 0x7f) {
1938 	buf[0] = (char)uv;
1939 	return 1;
1940     }
1941     if (uv <= 0x7ff) {
1942 	buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1943 	buf[1] = castchar((uv&0x3f)|0x80);
1944 	return 2;
1945     }
1946     if (uv <= 0xffff) {
1947 	buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1948 	buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1949 	buf[2] = castchar((uv&0x3f)|0x80);
1950 	return 3;
1951     }
1952     if (uv <= 0x1fffff) {
1953 	buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1954 	buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1955 	buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1956 	buf[3] = castchar((uv&0x3f)|0x80);
1957 	return 4;
1958     }
1959     if (uv <= 0x3ffffff) {
1960 	buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1961 	buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1962 	buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1963 	buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1964 	buf[4] = castchar((uv&0x3f)|0x80);
1965 	return 5;
1966     }
1967     if (uv <= 0x7fffffff) {
1968 	buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1969 	buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1970 	buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1971 	buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1972 	buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1973 	buf[5] = castchar((uv&0x3f)|0x80);
1974 	return 6;
1975     }
1976     rb_raise(rb_eRangeError, "pack(U): value out of range");
1977 
1978     UNREACHABLE_RETURN(Qnil);
1979 }
1980 
1981 static const unsigned long utf8_limits[] = {
1982     0x0,			/* 1 */
1983     0x80,			/* 2 */
1984     0x800,			/* 3 */
1985     0x10000,			/* 4 */
1986     0x200000,			/* 5 */
1987     0x4000000,			/* 6 */
1988     0x80000000,			/* 7 */
1989 };
1990 
1991 static unsigned long
utf8_to_uv(const char * p,long * lenp)1992 utf8_to_uv(const char *p, long *lenp)
1993 {
1994     int c = *p++ & 0xff;
1995     unsigned long uv = c;
1996     long n;
1997 
1998     if (!(uv & 0x80)) {
1999 	*lenp = 1;
2000         return uv;
2001     }
2002     if (!(uv & 0x40)) {
2003 	*lenp = 1;
2004 	rb_raise(rb_eArgError, "malformed UTF-8 character");
2005     }
2006 
2007     if      (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
2008     else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
2009     else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
2010     else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
2011     else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
2012     else {
2013 	*lenp = 1;
2014 	rb_raise(rb_eArgError, "malformed UTF-8 character");
2015     }
2016     if (n > *lenp) {
2017 	rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
2018 		 n, *lenp);
2019     }
2020     *lenp = n--;
2021     if (n != 0) {
2022 	while (n--) {
2023 	    c = *p++ & 0xff;
2024 	    if ((c & 0xc0) != 0x80) {
2025 		*lenp -= n + 1;
2026 		rb_raise(rb_eArgError, "malformed UTF-8 character");
2027 	    }
2028 	    else {
2029 		c &= 0x3f;
2030 		uv = uv << 6 | c;
2031 	    }
2032 	}
2033     }
2034     n = *lenp - 1;
2035     if (uv < utf8_limits[n]) {
2036 	rb_raise(rb_eArgError, "redundant UTF-8 sequence");
2037     }
2038     return uv;
2039 }
2040 
2041 void
Init_pack(void)2042 Init_pack(void)
2043 {
2044     rb_define_method(rb_cArray, "pack", pack_pack, -1);
2045     rb_define_method(rb_cString, "unpack", pack_unpack, 1);
2046     rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
2047 
2048     id_associated = rb_make_internal_id();
2049 }
2050