1 /*    pp_pack.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * He still hopefully carried some of his gear in his pack: a small tinder-box,
13  * two small shallow pans, the smaller fitting into the larger; inside them a
14  * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15  * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16  * some salt.
17  *
18  *     [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19  */
20 
21 /* This file contains pp ("push/pop") functions that
22  * execute the opcodes that make up a perl program. A typical pp function
23  * expects to find its arguments on the stack, and usually pushes its
24  * results onto the stack, hence the 'pp' terminology. Each OP structure
25  * contains a pointer to the relevant pp_foo() function.
26  *
27  * This particular file just contains pp_pack() and pp_unpack(). See the
28  * other pp*.c files for the rest of the pp_ functions.
29  */
30 
31 #include "EXTERN.h"
32 #define PERL_IN_PP_PACK_C
33 #include "perl.h"
34 
35 /* Types used by pack/unpack */
36 typedef enum {
37   e_no_len,     /* no length  */
38   e_number,     /* number, [] */
39   e_star        /* asterisk   */
40 } howlen_t;
41 
42 typedef struct tempsym {
43   const char*    patptr;   /* current template char */
44   const char*    patend;   /* one after last char   */
45   const char*    grpbeg;   /* 1st char of ()-group  */
46   const char*    grpend;   /* end of ()-group       */
47   I32      code;     /* template code (!<>)   */
48   U32      flags;    /* /=4, comma=2, pack=1  */
49                      /*   and group modifiers */
50   SSize_t  length;   /* length/repeat count   */
51   howlen_t howlen;   /* how length is given   */
52   int      level;    /* () nesting level      */
53   STRLEN   strbeg;   /* offset of group start */
54   struct tempsym *previous; /* previous group */
55 } tempsym_t;
56 
57 #define TEMPSYM_INIT(symptr, p, e, f) \
58     STMT_START {	\
59 	(symptr)->patptr   = (p);	\
60 	(symptr)->patend   = (e);	\
61 	(symptr)->grpbeg   = NULL;	\
62 	(symptr)->grpend   = NULL;	\
63 	(symptr)->grpend   = NULL;	\
64 	(symptr)->code     = 0;		\
65 	(symptr)->length   = 0;		\
66 	(symptr)->howlen   = e_no_len;	\
67 	(symptr)->level    = 0;		\
68 	(symptr)->flags    = (f);	\
69 	(symptr)->strbeg   = 0;		\
70 	(symptr)->previous = NULL;	\
71    } STMT_END
72 
73 typedef union {
74     NV nv;
75     U8 bytes[sizeof(NV)];
76 } NV_bytes;
77 
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79 typedef union {
80     long double ld;
81     U8 bytes[sizeof(long double)];
82 } ld_bytes;
83 #endif
84 
85 #ifndef CHAR_BIT
86 # define CHAR_BIT	8
87 #endif
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
89 #define UTF8_EXPAND	2
90 
91 /*
92  * Offset for integer pack/unpack.
93  *
94  * On architectures where I16 and I32 aren't really 16 and 32 bits,
95  * which for now are all Crays, pack and unpack have to play games.
96  */
97 
98 /*
99  * These values are required for portability of pack() output.
100  * If they're not right on your machine, then pack() and unpack()
101  * wouldn't work right anyway; you'll need to apply the Cray hack.
102  * (I'd like to check them with #if, but you can't use sizeof() in
103  * the preprocessor.)  --???
104  */
105 /*
106     The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107     defines are now in config.h.  --Andy Dougherty  April 1998
108  */
109 #define SIZE16 2
110 #define SIZE32 4
111 
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113    --jhi Feb 1999 */
114 
115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116 #  define OFF16(p)     ((char *) (p))
117 #  define OFF32(p)     ((char *) (p))
118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
119 #  define OFF16(p)	((char*)(p))
120 #  define OFF32(p)	((char*)(p))
121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
122 #  define OFF16(p)	((char*)(p) + (sizeof(U16) - SIZE16))
123 #  define OFF32(p)	((char*)(p) + (sizeof(U32) - SIZE32))
124 #else
125 #  error "bad cray byte order"
126 #endif
127 
128 #define PUSH16(utf8, cur, p, needs_swap)                        \
129        PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130 #define PUSH32(utf8, cur, p, needs_swap)                        \
131        PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
132 
133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
134 #  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678  /* little-endian */
136 #  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137 #else
138 #  error "Unsupported byteorder"
139         /* Need to add code here to re-instate mixed endian support.
140            NEEDS_SWAP would need to hold a flag indicating which action to
141            take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142            logic adding to deal with any mixed-endian transformations needed.
143         */
144 #endif
145 
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)	\
148 STMT_START {						\
149     if (UNLIKELY(utf8)) {                               \
150         if (!S_utf8_to_bytes(aTHX_ &s, strend,		\
151 	  (char *) (buf), len, datumtype)) break;	\
152     } else {						\
153         if (UNLIKELY(needs_swap))                       \
154             S_reverse_copy(s, (char *) (buf), len);     \
155         else                                            \
156             Copy(s, (char *) (buf), len, char);		\
157         s += len;					\
158     }							\
159 } STMT_END
160 
161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap)              \
162        SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
163 
164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap)              \
165        SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
166 
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap)          \
168        SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
169 
170 #define PUSH_VAR(utf8, aptr, var, needs_swap)           \
171        PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
172 
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
175 
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8	      0x40
178 #define FLAG_PARSE_UTF8       0x20	/* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE  0x10
180 #define FLAG_DO_UTF8          0x08	/* The underlying string is utf8 */
181 #define FLAG_SLASH            0x04
182 #define FLAG_COMMA            0x02
183 #define FLAG_PACK             0x01
184 
185 STATIC SV *
S_mul128(pTHX_ SV * sv,U8 m)186 S_mul128(pTHX_ SV *sv, U8 m)
187 {
188   STRLEN          len;
189   char           *s = SvPV(sv, len);
190   char           *t;
191 
192   PERL_ARGS_ASSERT_MUL128;
193 
194   if (! memBEGINs(s, len, "0000")) {  /* need to grow sv */
195     SV * const tmpNew = newSVpvs("0000000000");
196 
197     sv_catsv(tmpNew, sv);
198     SvREFCNT_dec(sv);		/* free old sv */
199     sv = tmpNew;
200     s = SvPV(sv, len);
201   }
202   t = s + len - 1;
203   while (!*t)                   /* trailing '\0'? */
204     t--;
205   while (t > s) {
206     const U32 i = ((*t - '0') << 7) + m;
207     *(t--) = '0' + (char)(i % 10);
208     m = (char)(i / 10);
209   }
210   return (sv);
211 }
212 
213 /* Explosives and implosives. */
214 
215 #define ISUUCHAR(ch)    (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ')  \
216                       && NATIVE_TO_LATIN1(ch) <  NATIVE_TO_LATIN1('a'))
217 
218 /* type modifiers */
219 #define TYPE_IS_SHRIEKING	0x100
220 #define TYPE_IS_BIG_ENDIAN	0x200
221 #define TYPE_IS_LITTLE_ENDIAN	0x400
222 #define TYPE_IS_PACK		0x800
223 #define TYPE_ENDIANNESS_MASK	(TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
224 #define TYPE_MODIFIERS(t)	((t) & ~0xFF)
225 #define TYPE_NO_MODIFIERS(t)	((t) & 0xFF)
226 
227 # define TYPE_ENDIANNESS(t)	((t) & TYPE_ENDIANNESS_MASK)
228 # define TYPE_NO_ENDIANNESS(t)	((t) & ~TYPE_ENDIANNESS_MASK)
229 
230 # define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
231 
232 #define PACK_SIZE_CANNOT_CSUM		0x80
233 #define PACK_SIZE_UNPREDICTABLE		0x40	/* Not a fixed size element */
234 #define PACK_SIZE_MASK			0x3F
235 
236 #include "packsizetables.inc"
237 
238 static void
S_reverse_copy(const char * src,char * dest,STRLEN len)239 S_reverse_copy(const char *src, char *dest, STRLEN len)
240 {
241     dest += len;
242     while (len--)
243         *--dest = *src++;
244 }
245 
246 STATIC U8
utf8_to_byte(pTHX_ const char ** s,const char * end,I32 datumtype)247 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
248 {
249     STRLEN retlen;
250     UV val;
251 
252     if (*s >= end) {
253 	goto croak;
254     }
255     val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
256 			 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
257     if (retlen == (STRLEN) -1)
258       croak:
259 	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
260 		   (int) TYPE_NO_MODIFIERS(datumtype));
261     if (val >= 0x100) {
262 	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
263 		       "Character in '%c' format wrapped in unpack",
264 		       (int) TYPE_NO_MODIFIERS(datumtype));
265 	val &= 0xff;
266     }
267     *s += retlen;
268     return (U8)val;
269 }
270 
271 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
272 	utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
273 	*(U8 *)(s)++)
274 
275 STATIC bool
S_utf8_to_bytes(pTHX_ const char ** s,const char * end,const char * buf,SSize_t buf_len,I32 datumtype)276 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
277 {
278     UV val;
279     STRLEN retlen;
280     const char *from = *s;
281     int bad = 0;
282     const U32 flags = ckWARN(WARN_UTF8) ?
283 	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
284     const bool needs_swap = NEEDS_SWAP(datumtype);
285 
286     if (UNLIKELY(needs_swap))
287         buf += buf_len;
288 
289     for (;buf_len > 0; buf_len--) {
290 	if (from >= end) return FALSE;
291 	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
292 	if (retlen == (STRLEN) -1) {
293 	    from += UTF8_SAFE_SKIP(from, end);
294 	    bad |= 1;
295 	} else from += retlen;
296 	if (val >= 0x100) {
297 	    bad |= 2;
298 	    val &= 0xff;
299 	}
300         if (UNLIKELY(needs_swap))
301             *(U8 *)--buf = (U8)val;
302         else
303             *(U8 *)buf++ = (U8)val;
304     }
305     /* We have enough characters for the buffer. Did we have problems ? */
306     if (bad) {
307 	if (bad & 1) {
308 	    /* Rewalk the string fragment while warning */
309 	    const char *ptr;
310 	    const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
311 	    for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
312 		if (ptr >= end) break;
313 		utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
314 	    }
315 	    if (from > end) from = end;
316 	}
317 	if ((bad & 2))
318 	    Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
319 				       WARN_PACK : WARN_UNPACK),
320 			   "Character(s) in '%c' format wrapped in %s",
321 			   (int) TYPE_NO_MODIFIERS(datumtype),
322 			   datumtype & TYPE_IS_PACK ? "pack" : "unpack");
323     }
324     *s = from;
325     return TRUE;
326 }
327 
328 STATIC char *
S_my_bytes_to_utf8(const U8 * start,STRLEN len,char * dest,const bool needs_swap)329 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
330     PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
331 
332     if (UNLIKELY(needs_swap)) {
333         const U8 *p = start + len;
334         while (p-- > start) {
335             append_utf8_from_native_byte(*p, (U8 **) & dest);
336         }
337     } else {
338         const U8 * const end = start + len;
339         while (start < end) {
340             append_utf8_from_native_byte(*start, (U8 **) & dest);
341             start++;
342         }
343     }
344     return dest;
345 }
346 
347 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
348 STMT_START {							\
349     if (UNLIKELY(utf8))	                                        \
350 	(cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap);       \
351     else {							\
352         if (UNLIKELY(needs_swap))                               \
353             S_reverse_copy((char *)(buf), cur, len);            \
354         else                                                    \
355             Copy(buf, cur, len, char);				\
356 	(cur) += (len);						\
357     }								\
358 } STMT_END
359 
360 #define SAFE_UTF8_EXPAND(var)	\
361 STMT_START {				\
362     if ((var) > SSize_t_MAX / UTF8_EXPAND) \
363         Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
364     (var) = (var) * UTF8_EXPAND; \
365 } STMT_END
366 
367 #define GROWING2(utf8, cat, start, cur, item_size, item_count)	\
368 STMT_START {							\
369     if (SSize_t_MAX / (item_size) < (item_count))		\
370         Perl_croak(aTHX_ "%s", "Out of memory during pack()");	\
371     GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
372 } STMT_END
373 
374 #define GROWING(utf8, cat, start, cur, in_len)	\
375 STMT_START {					\
376     STRLEN glen = (in_len);			\
377     STRLEN catcur = (STRLEN)((cur) - (start));	\
378     if (utf8) SAFE_UTF8_EXPAND(glen);		\
379     if (SSize_t_MAX - glen < catcur)		\
380         Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
381     if (catcur + glen >= SvLEN(cat)) {	\
382 	(start) = sv_exp_grow(cat, glen);	\
383 	(cur) = (start) + SvCUR(cat);		\
384     }						\
385 } STMT_END
386 
387 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
388 STMT_START {					\
389     const STRLEN glen = (in_len);		\
390     STRLEN gl = glen;				\
391     if (utf8) SAFE_UTF8_EXPAND(gl);		\
392     if ((cur) + gl >= (start) + SvLEN(cat)) {	\
393         *cur = '\0';				\
394         SvCUR_set((cat), (cur) - (start));	\
395 	(start) = sv_exp_grow(cat, gl);		\
396 	(cur) = (start) + SvCUR(cat);		\
397     }						\
398     PUSH_BYTES(utf8, cur, buf, glen, 0);        \
399 } STMT_END
400 
401 #define PUSH_BYTE(utf8, s, byte)		\
402 STMT_START {					\
403     if (utf8) {					\
404 	const U8 au8 = (byte);			\
405 	(s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
406     } else *(U8 *)(s)++ = (byte);		\
407 } STMT_END
408 
409 /* Only to be used inside a loop (see the break) */
410 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)		\
411 STMT_START {							\
412     STRLEN retlen;						\
413     if (str >= end) break;					\
414     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);	\
415     if (retlen == (STRLEN) -1) {			        \
416 	*cur = '\0';						\
417 	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");	\
418     }								\
419     str += retlen;						\
420 } STMT_END
421 
_action(const tempsym_t * symptr)422 static const char *_action( const tempsym_t* symptr )
423 {
424     return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
425 }
426 
427 /* Returns the sizeof() struct described by pat */
428 STATIC SSize_t
S_measure_struct(pTHX_ tempsym_t * symptr)429 S_measure_struct(pTHX_ tempsym_t* symptr)
430 {
431     SSize_t total = 0;
432 
433     PERL_ARGS_ASSERT_MEASURE_STRUCT;
434 
435     while (next_symbol(symptr)) {
436 	SSize_t len, size;
437 
438         switch (symptr->howlen) {
439 	  case e_star:
440    	    Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
441                         _action( symptr ) );
442 
443 	  default:
444 	    /* e_no_len and e_number */
445 	    len = symptr->length;
446 	    break;
447         }
448 
449 	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
450 	if (!size) {
451             SSize_t star;
452 	    /* endianness doesn't influence the size of a type */
453 	    switch(TYPE_NO_ENDIANNESS(symptr->code)) {
454 	    default:
455 		Perl_croak(aTHX_ "Invalid type '%c' in %s",
456 			   (int)TYPE_NO_MODIFIERS(symptr->code),
457                            _action( symptr ) );
458 	    case '.' | TYPE_IS_SHRIEKING:
459 	    case '@' | TYPE_IS_SHRIEKING:
460 	    case '@':
461 	    case '.':
462 	    case '/':
463 	    case 'U':			/* XXXX Is it correct? */
464 	    case 'w':
465 	    case 'u':
466 		Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
467 			   (int) TYPE_NO_MODIFIERS(symptr->code),
468                            _action( symptr ) );
469 	    case '%':
470 		size = 0;
471 		break;
472 	    case '(':
473 	    {
474 		tempsym_t savsym = *symptr;
475 		symptr->patptr = savsym.grpbeg;
476 		symptr->patend = savsym.grpend;
477 		/* XXXX Theoretically, we need to measure many times at
478 		   different positions, since the subexpression may contain
479 		   alignment commands, but be not of aligned length.
480 		   Need to detect this and croak().  */
481 		size = measure_struct(symptr);
482 		*symptr = savsym;
483 		break;
484 	    }
485 	    case 'X' | TYPE_IS_SHRIEKING:
486 		/* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
487 		 */
488 		if (!len)		/* Avoid division by 0 */
489 		    len = 1;
490 		len = total % len;	/* Assumed: the start is aligned. */
491 		/* FALLTHROUGH */
492 	    case 'X':
493 		size = -1;
494 		if (total < len)
495                     Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
496 		break;
497 	    case 'x' | TYPE_IS_SHRIEKING:
498 		if (!len)		/* Avoid division by 0 */
499 		    len = 1;
500 		star = total % len;	/* Assumed: the start is aligned. */
501 		if (star)		/* Other portable ways? */
502 		    len = len - star;
503 		else
504 		    len = 0;
505 		/* FALLTHROUGH */
506 	    case 'x':
507 	    case 'A':
508 	    case 'Z':
509 	    case 'a':
510 		size = 1;
511 		break;
512 	    case 'B':
513 	    case 'b':
514 		len = (len + 7)/8;
515 		size = 1;
516 		break;
517 	    case 'H':
518 	    case 'h':
519 		len = (len + 1)/2;
520 		size = 1;
521 		break;
522 
523 	    case 'P':
524 		len = 1;
525 		size = sizeof(char*);
526 		break;
527 	    }
528 	}
529 	total += len * size;
530     }
531     return total;
532 }
533 
534 
535 /* locate matching closing parenthesis or bracket
536  * returns char pointer to char after match, or NULL
537  */
538 STATIC const char *
S_group_end(pTHX_ const char * patptr,const char * patend,char ender)539 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
540 {
541     PERL_ARGS_ASSERT_GROUP_END;
542 
543     while (patptr < patend) {
544 	const char c = *patptr++;
545 
546 	if (isSPACE(c))
547 	    continue;
548 	else if (c == ender)
549 	    return patptr-1;
550 	else if (c == '#') {
551 	    while (patptr < patend && *patptr != '\n')
552 		patptr++;
553 	    continue;
554 	} else if (c == '(')
555 	    patptr = group_end(patptr, patend, ')') + 1;
556 	else if (c == '[')
557 	    patptr = group_end(patptr, patend, ']') + 1;
558     }
559     Perl_croak(aTHX_ "No group ending character '%c' found in template",
560                ender);
561     NOT_REACHED; /* NOTREACHED */
562 }
563 
564 
565 /* Convert unsigned decimal number to binary.
566  * Expects a pointer to the first digit and address of length variable
567  * Advances char pointer to 1st non-digit char and returns number
568  */
569 STATIC const char *
S_get_num(pTHX_ const char * patptr,SSize_t * lenptr)570 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
571 {
572   SSize_t len = *patptr++ - '0';
573 
574   PERL_ARGS_ASSERT_GET_NUM;
575 
576   while (isDIGIT(*patptr)) {
577     SSize_t nlen = (len * 10) + (*patptr++ - '0');
578     if (nlen < 0 || nlen/10 != len)
579       Perl_croak(aTHX_ "pack/unpack repeat count overflow");
580     len = nlen;
581   }
582   *lenptr = len;
583   return patptr;
584 }
585 
586 /* The marvellous template parsing routine: Using state stored in *symptr,
587  * locates next template code and count
588  */
589 STATIC bool
S_next_symbol(pTHX_ tempsym_t * symptr)590 S_next_symbol(pTHX_ tempsym_t* symptr )
591 {
592   const char* patptr = symptr->patptr;
593   const char* const patend = symptr->patend;
594 
595   PERL_ARGS_ASSERT_NEXT_SYMBOL;
596 
597   symptr->flags &= ~FLAG_SLASH;
598 
599   while (patptr < patend) {
600     if (isSPACE(*patptr))
601       patptr++;
602     else if (*patptr == '#') {
603       patptr++;
604       while (patptr < patend && *patptr != '\n')
605 	patptr++;
606       if (patptr < patend)
607 	patptr++;
608     } else {
609       /* We should have found a template code */
610       I32 code = *patptr++ & 0xFF;
611       U32 inherited_modifiers = 0;
612 
613       if (code == ','){ /* grandfather in commas but with a warning */
614 	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
615           symptr->flags |= FLAG_COMMA;
616 	  Perl_warner(aTHX_ packWARN(WARN_UNPACK),
617 	 	      "Invalid type ',' in %s", _action( symptr ) );
618         }
619 	continue;
620       }
621 
622       /* for '(', skip to ')' */
623       if (code == '(') {
624         if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
625           Perl_croak(aTHX_ "()-group starts with a count in %s",
626                         _action( symptr ) );
627         symptr->grpbeg = patptr;
628         patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
629         if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
630 	  Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
631                         _action( symptr ) );
632       }
633 
634       /* look for group modifiers to inherit */
635       if (TYPE_ENDIANNESS(symptr->flags)) {
636         if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
637           inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
638       }
639 
640       /* look for modifiers */
641       while (patptr < patend) {
642         const char *allowed;
643         I32 modifier;
644         switch (*patptr) {
645           case '!':
646             modifier = TYPE_IS_SHRIEKING;
647             allowed = "sSiIlLxXnNvV@.";
648             break;
649           case '>':
650             modifier = TYPE_IS_BIG_ENDIAN;
651             allowed = ENDIANNESS_ALLOWED_TYPES;
652             break;
653           case '<':
654             modifier = TYPE_IS_LITTLE_ENDIAN;
655             allowed = ENDIANNESS_ALLOWED_TYPES;
656             break;
657           default:
658             allowed = "";
659             modifier = 0;
660             break;
661         }
662 
663         if (modifier == 0)
664           break;
665 
666         if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
667           Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
668                         allowed, _action( symptr ) );
669 
670         if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
671           Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
672                      (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
673         else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
674                  TYPE_ENDIANNESS_MASK)
675           Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
676                      *patptr, _action( symptr ) );
677 
678         if ((code & modifier)) {
679 	    Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
680 			   "Duplicate modifier '%c' after '%c' in %s",
681 			   *patptr, (int) TYPE_NO_MODIFIERS(code),
682 			   _action( symptr ) );
683         }
684 
685         code |= modifier;
686         patptr++;
687       }
688 
689       /* inherit modifiers */
690       code |= inherited_modifiers;
691 
692       /* look for count and/or / */
693       if (patptr < patend) {
694 	if (isDIGIT(*patptr)) {
695  	  patptr = get_num( patptr, &symptr->length );
696           symptr->howlen = e_number;
697 
698         } else if (*patptr == '*') {
699           patptr++;
700           symptr->howlen = e_star;
701 
702         } else if (*patptr == '[') {
703           const char* lenptr = ++patptr;
704           symptr->howlen = e_number;
705           patptr = group_end( patptr, patend, ']' ) + 1;
706           /* what kind of [] is it? */
707           if (isDIGIT(*lenptr)) {
708             lenptr = get_num( lenptr, &symptr->length );
709             if( *lenptr != ']' )
710               Perl_croak(aTHX_ "Malformed integer in [] in %s",
711                             _action( symptr ) );
712           } else {
713             tempsym_t savsym = *symptr;
714             symptr->patend = patptr-1;
715             symptr->patptr = lenptr;
716             savsym.length = measure_struct(symptr);
717             *symptr = savsym;
718           }
719         } else {
720           symptr->howlen = e_no_len;
721           symptr->length = 1;
722         }
723 
724         /* try to find / */
725         while (patptr < patend) {
726           if (isSPACE(*patptr))
727             patptr++;
728           else if (*patptr == '#') {
729             patptr++;
730             while (patptr < patend && *patptr != '\n')
731 	      patptr++;
732             if (patptr < patend)
733 	      patptr++;
734           } else {
735             if (*patptr == '/') {
736               symptr->flags |= FLAG_SLASH;
737               patptr++;
738               if (patptr < patend &&
739                   (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
740                 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
741                             _action( symptr ) );
742             }
743             break;
744 	  }
745 	}
746       } else {
747         /* at end - no count, no / */
748         symptr->howlen = e_no_len;
749         symptr->length = 1;
750       }
751 
752       symptr->code = code;
753       symptr->patptr = patptr;
754       return TRUE;
755     }
756   }
757   symptr->patptr = patptr;
758   return FALSE;
759 }
760 
761 /*
762    There is no way to cleanly handle the case where we should process the
763    string per byte in its upgraded form while it's really in downgraded form
764    (e.g. estimates like strend-s as an upper bound for the number of
765    characters left wouldn't work). So if we foresee the need of this
766    (pattern starts with U or contains U0), we want to work on the encoded
767    version of the string. Users are advised to upgrade their pack string
768    themselves if they need to do a lot of unpacks like this on it
769 */
770 STATIC bool
need_utf8(const char * pat,const char * patend)771 need_utf8(const char *pat, const char *patend)
772 {
773     bool first = TRUE;
774 
775     PERL_ARGS_ASSERT_NEED_UTF8;
776 
777     while (pat < patend) {
778 	if (pat[0] == '#') {
779 	    pat++;
780 	    pat = (const char *) memchr(pat, '\n', patend-pat);
781 	    if (!pat) return FALSE;
782 	} else if (pat[0] == 'U') {
783 	    if (first || pat[1] == '0') return TRUE;
784 	} else first = FALSE;
785 	pat++;
786     }
787     return FALSE;
788 }
789 
790 STATIC char
first_symbol(const char * pat,const char * patend)791 first_symbol(const char *pat, const char *patend) {
792     PERL_ARGS_ASSERT_FIRST_SYMBOL;
793 
794     while (pat < patend) {
795 	if (pat[0] != '#') return pat[0];
796 	pat++;
797 	pat = (const char *) memchr(pat, '\n', patend-pat);
798 	if (!pat) return 0;
799 	pat++;
800     }
801     return 0;
802 }
803 
804 /*
805 
806 =head1 Pack and Unpack
807 
808 =for apidoc unpackstring
809 
810 The engine implementing the C<unpack()> Perl function.
811 
812 Using the template C<pat..patend>, this function unpacks the string
813 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
814 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
815 C<SPAGAIN> after the call to this function).  It returns the number of
816 pushed elements.
817 
818 The C<strend> and C<patend> pointers should point to the byte following the
819 last character of each string.
820 
821 Although this function returns its values on the perl argument stack, it
822 doesn't take any parameters from that stack (and thus in particular
823 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
824 example).
825 
826 =cut */
827 
828 SSize_t
Perl_unpackstring(pTHX_ const char * pat,const char * patend,const char * s,const char * strend,U32 flags)829 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
830 {
831     tempsym_t sym;
832 
833     PERL_ARGS_ASSERT_UNPACKSTRING;
834 
835     if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
836     else if (need_utf8(pat, patend)) {
837 	/* We probably should try to avoid this in case a scalar context call
838 	   wouldn't get to the "U0" */
839 	STRLEN len = strend - s;
840 	s = (char *) bytes_to_utf8((U8 *) s, &len);
841 	SAVEFREEPV(s);
842 	strend = s + len;
843 	flags |= FLAG_DO_UTF8;
844     }
845 
846     if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
847 	flags |= FLAG_PARSE_UTF8;
848 
849     TEMPSYM_INIT(&sym, pat, patend, flags);
850 
851     return unpack_rec(&sym, s, s, strend, NULL );
852 }
853 
854 STATIC SSize_t
S_unpack_rec(pTHX_ tempsym_t * symptr,const char * s,const char * strbeg,const char * strend,const char ** new_s)855 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
856 {
857     dSP;
858     SV *sv = NULL;
859     const SSize_t start_sp_offset = SP - PL_stack_base;
860     howlen_t howlen;
861     SSize_t checksum = 0;
862     UV cuv = 0;
863     NV cdouble = 0.0;
864     const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
865     bool beyond = FALSE;
866     bool explicit_length;
867     const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
868     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
869 
870     PERL_ARGS_ASSERT_UNPACK_REC;
871 
872     symptr->strbeg = s - strbeg;
873 
874     while (next_symbol(symptr)) {
875 	packprops_t props;
876 	SSize_t len;
877         I32 datumtype = symptr->code;
878         bool needs_swap;
879 	/* do first one only unless in list context
880 	   / is implemented by unpacking the count, then popping it from the
881 	   stack, so must check that we're not in the middle of a /  */
882         if ( unpack_only_one
883 	     && (SP - PL_stack_base == start_sp_offset + 1)
884 	     && (datumtype != '/') )   /* XXX can this be omitted */
885             break;
886 
887         switch (howlen = symptr->howlen) {
888 	  case e_star:
889 	    len = strend - strbeg;	/* long enough */
890 	    break;
891 	  default:
892 	    /* e_no_len and e_number */
893 	    len = symptr->length;
894 	    break;
895         }
896 
897         explicit_length = TRUE;
898       redo_switch:
899         beyond = s >= strend;
900 
901 	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
902 	if (props) {
903 	    /* props nonzero means we can process this letter. */
904             const SSize_t size = props & PACK_SIZE_MASK;
905             const SSize_t howmany = (strend - s) / size;
906 	    if (len > howmany)
907 		len = howmany;
908 
909 	    if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
910 		if (len && unpack_only_one) len = 1;
911 		EXTEND(SP, len);
912 		EXTEND_MORTAL(len);
913 	    }
914 	}
915 
916         needs_swap = NEEDS_SWAP(datumtype);
917 
918 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
919 	default:
920 	    Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
921 
922 	case '%':
923 	    if (howlen == e_no_len)
924 		len = 16;		/* len is not specified */
925 	    checksum = len;
926 	    cuv = 0;
927 	    cdouble = 0;
928 	    continue;
929 
930 	case '(':
931 	{
932             tempsym_t savsym = *symptr;
933             const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
934 	    symptr->flags |= group_modifiers;
935             symptr->patend = savsym.grpend;
936 	    symptr->previous = &savsym;
937             symptr->level++;
938 	    PUTBACK;
939 	    if (len && unpack_only_one) len = 1;
940 	    while (len--) {
941   	        symptr->patptr = savsym.grpbeg;
942 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
943 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
944  	        unpack_rec(symptr, s, strbeg, strend, &s);
945                 if (s == strend && savsym.howlen == e_star)
946 		    break; /* No way to continue */
947 	    }
948 	    SPAGAIN;
949             savsym.flags = symptr->flags & ~group_modifiers;
950             *symptr = savsym;
951 	    break;
952 	}
953 	case '.' | TYPE_IS_SHRIEKING:
954 	case '.': {
955 	    const char *from;
956 	    SV *sv;
957 	    const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
958 	    if (howlen == e_star) from = strbeg;
959 	    else if (len <= 0) from = s;
960 	    else {
961 		tempsym_t *group = symptr;
962 
963 		while (--len && group) group = group->previous;
964 		from = group ? strbeg + group->strbeg : strbeg;
965 	    }
966 	    sv = from <= s ?
967 		newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
968 		newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
969 	    mXPUSHs(sv);
970 	    break;
971 	}
972 	case '@' | TYPE_IS_SHRIEKING:
973 	case '@':
974 	    s = strbeg + symptr->strbeg;
975 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
976 	    {
977 		while (len > 0) {
978 		    if (s >= strend)
979 			Perl_croak(aTHX_ "'@' outside of string in unpack");
980 		    s += UTF8SKIP(s);
981 		    len--;
982 		}
983 		if (s > strend)
984 		    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
985 	    } else {
986 		if (strend-s < len)
987 		    Perl_croak(aTHX_ "'@' outside of string in unpack");
988 		s += len;
989 	    }
990 	    break;
991  	case 'X' | TYPE_IS_SHRIEKING:
992  	    if (!len)			/* Avoid division by 0 */
993  		len = 1;
994 	    if (utf8) {
995 		const char *hop, *last;
996 		SSize_t l = len;
997 		hop = last = strbeg;
998 		while (hop < s) {
999 		    hop += UTF8SKIP(hop);
1000 		    if (--l == 0) {
1001 			last = hop;
1002 			l = len;
1003 		    }
1004 		}
1005 		if (last > s)
1006 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1007 		s = last;
1008 		break;
1009 	    }
1010 	    len = (s - strbeg) % len;
1011  	    /* FALLTHROUGH */
1012 	case 'X':
1013 	    if (utf8) {
1014 		while (len > 0) {
1015 		    if (s <= strbeg)
1016 			Perl_croak(aTHX_ "'X' outside of string in unpack");
1017 		    while (--s, UTF8_IS_CONTINUATION(*s)) {
1018 			if (s <= strbeg)
1019 			    Perl_croak(aTHX_ "'X' outside of string in unpack");
1020 		    }
1021 		    len--;
1022 		}
1023 	    } else {
1024 		if (len > s - strbeg)
1025 		    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1026 		s -= len;
1027 	    }
1028 	    break;
1029  	case 'x' | TYPE_IS_SHRIEKING: {
1030             SSize_t ai32;
1031  	    if (!len)			/* Avoid division by 0 */
1032  		len = 1;
1033 	    if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1034 	    else      ai32 = (s - strbeg)                         % len;
1035 	    if (ai32 == 0) break;
1036 	    len -= ai32;
1037             }
1038  	    /* FALLTHROUGH */
1039 	case 'x':
1040 	    if (utf8) {
1041 		while (len>0) {
1042 		    if (s >= strend)
1043 			Perl_croak(aTHX_ "'x' outside of string in unpack");
1044 		    s += UTF8SKIP(s);
1045 		    len--;
1046 		}
1047 	    } else {
1048 		if (len > strend - s)
1049 		    Perl_croak(aTHX_ "'x' outside of string in unpack");
1050 		s += len;
1051 	    }
1052 	    break;
1053 	case '/':
1054 	    Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1055 
1056 	case 'A':
1057 	case 'Z':
1058 	case 'a':
1059 	    if (checksum) {
1060 		/* Preliminary length estimate is assumed done in 'W' */
1061 		if (len > strend - s) len = strend - s;
1062 		goto W_checksum;
1063 	    }
1064 	    if (utf8) {
1065 		SSize_t l;
1066 		const char *hop;
1067 		for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1068 		    if (hop >= strend) {
1069 			if (hop > strend)
1070 			    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1071 			break;
1072 		    }
1073 		}
1074 		if (hop > strend)
1075 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1076 		len = hop - s;
1077 	    } else if (len > strend - s)
1078 		len = strend - s;
1079 
1080 	    if (datumtype == 'Z') {
1081 		/* 'Z' strips stuff after first null */
1082 		const char *ptr, *end;
1083 		end = s + len;
1084 		for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1085 		sv = newSVpvn(s, ptr-s);
1086 		if (howlen == e_star) /* exact for 'Z*' */
1087 		    len = ptr-s + (ptr != strend ? 1 : 0);
1088 	    } else if (datumtype == 'A') {
1089 		/* 'A' strips both nulls and spaces */
1090 		const char *ptr;
1091 		if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1092                     for (ptr = s+len-1; ptr >= s; ptr--) {
1093                         if (   *ptr != 0
1094                             && !UTF8_IS_CONTINUATION(*ptr)
1095                             && !isSPACE_utf8_safe(ptr, strend))
1096                         {
1097                             break;
1098                         }
1099                     }
1100 		    if (ptr >= s) ptr += UTF8SKIP(ptr);
1101 		    else ptr++;
1102 		    if (ptr > s+len)
1103 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1104 		} else {
1105 		    for (ptr = s+len-1; ptr >= s; ptr--)
1106 			if (*ptr != 0 && !isSPACE(*ptr)) break;
1107 		    ptr++;
1108 		}
1109 		sv = newSVpvn(s, ptr-s);
1110 	    } else sv = newSVpvn(s, len);
1111 
1112 	    if (utf8) {
1113 		SvUTF8_on(sv);
1114 		/* Undo any upgrade done due to need_utf8() */
1115 		if (!(symptr->flags & FLAG_WAS_UTF8))
1116 		    sv_utf8_downgrade(sv, 0);
1117 	    }
1118 	    mXPUSHs(sv);
1119 	    s += len;
1120 	    break;
1121 	case 'B':
1122 	case 'b': {
1123 	    char *str;
1124 	    if (howlen == e_star || len > (strend - s) * 8)
1125 		len = (strend - s) * 8;
1126 	    if (checksum) {
1127 		if (utf8)
1128 		    while (len >= 8 && s < strend) {
1129 			cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1130 			len -= 8;
1131 		    }
1132 		else
1133 		    while (len >= 8) {
1134 			cuv += PL_bitcount[*(U8 *)s++];
1135 			len -= 8;
1136 		    }
1137 		if (len && s < strend) {
1138 		    U8 bits;
1139 		    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 		    if (datumtype == 'b')
1141 			while (len-- > 0) {
1142 			    if (bits & 1) cuv++;
1143 			    bits >>= 1;
1144 			}
1145 		    else
1146 			while (len-- > 0) {
1147 			    if (bits & 0x80) cuv++;
1148 			    bits <<= 1;
1149 			}
1150 		}
1151 		break;
1152 	    }
1153 
1154 	    sv = sv_2mortal(newSV(len ? len : 1));
1155 	    SvPOK_on(sv);
1156 	    str = SvPVX(sv);
1157 	    if (datumtype == 'b') {
1158 		U8 bits = 0;
1159 		const SSize_t ai32 = len;
1160 		for (len = 0; len < ai32; len++) {
1161 		    if (len & 7) bits >>= 1;
1162 		    else if (utf8) {
1163 			if (s >= strend) break;
1164 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1165 		    } else bits = *(U8 *) s++;
1166 		    *str++ = bits & 1 ? '1' : '0';
1167 		}
1168 	    } else {
1169 		U8 bits = 0;
1170 		const SSize_t ai32 = len;
1171 		for (len = 0; len < ai32; len++) {
1172 		    if (len & 7) bits <<= 1;
1173 		    else if (utf8) {
1174 			if (s >= strend) break;
1175 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1176 		    } else bits = *(U8 *) s++;
1177 		    *str++ = bits & 0x80 ? '1' : '0';
1178 		}
1179 	    }
1180 	    *str = '\0';
1181 	    SvCUR_set(sv, str - SvPVX_const(sv));
1182 	    XPUSHs(sv);
1183 	    break;
1184 	}
1185 	case 'H':
1186 	case 'h': {
1187 	    char *str = NULL;
1188 	    /* Preliminary length estimate, acceptable for utf8 too */
1189 	    if (howlen == e_star || len > (strend - s) * 2)
1190 		len = (strend - s) * 2;
1191 	    if (!checksum) {
1192 		sv = sv_2mortal(newSV(len ? len : 1));
1193 		SvPOK_on(sv);
1194 		str = SvPVX(sv);
1195 	    }
1196 	    if (datumtype == 'h') {
1197 		U8 bits = 0;
1198 		SSize_t ai32 = len;
1199 		for (len = 0; len < ai32; len++) {
1200 		    if (len & 1) bits >>= 4;
1201 		    else if (utf8) {
1202 			if (s >= strend) break;
1203 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1204 		    } else bits = * (U8 *) s++;
1205 		    if (!checksum)
1206 			*str++ = PL_hexdigit[bits & 15];
1207 		}
1208 	    } else {
1209 		U8 bits = 0;
1210 		const SSize_t ai32 = len;
1211 		for (len = 0; len < ai32; len++) {
1212 		    if (len & 1) bits <<= 4;
1213 		    else if (utf8) {
1214 			if (s >= strend) break;
1215 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1216 		    } else bits = *(U8 *) s++;
1217 		    if (!checksum)
1218 			*str++ = PL_hexdigit[(bits >> 4) & 15];
1219 		}
1220 	    }
1221 	    if (!checksum) {
1222 		*str = '\0';
1223 		SvCUR_set(sv, str - SvPVX_const(sv));
1224 		XPUSHs(sv);
1225 	    }
1226 	    break;
1227 	}
1228 	case 'C':
1229             if (len == 0) {
1230                 if (explicit_length)
1231 		    /* Switch to "character" mode */
1232 		    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1233 		break;
1234 	    }
1235 	    /* FALLTHROUGH */
1236 	case 'c':
1237 	    while (len-- > 0 && s < strend) {
1238 		int aint;
1239 		if (utf8)
1240 		  {
1241 		    STRLEN retlen;
1242 		    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 				 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1244 		    if (retlen == (STRLEN) -1)
1245 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1246 		    s += retlen;
1247 		  }
1248 		else
1249 		  aint = *(U8 *)(s)++;
1250 		if (aint >= 128 && datumtype != 'C')	/* fake up signed chars */
1251 		    aint -= 256;
1252 		if (!checksum)
1253 		    mPUSHi(aint);
1254 		else if (checksum > bits_in_uv)
1255 		    cdouble += (NV)aint;
1256 		else
1257 		    cuv += aint;
1258 	    }
1259 	    break;
1260 	case 'W':
1261 	  W_checksum:
1262 	    if (utf8) {
1263 		while (len-- > 0 && s < strend) {
1264 		    STRLEN retlen;
1265 		    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1266 					 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1267 		    if (retlen == (STRLEN) -1)
1268 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1269 		    s += retlen;
1270 		    if (!checksum)
1271 			mPUSHu(val);
1272 		    else if (checksum > bits_in_uv)
1273 			cdouble += (NV) val;
1274 		    else
1275 			cuv += val;
1276 		}
1277 	    } else if (!checksum)
1278 		while (len-- > 0) {
1279 		    const U8 ch = *(U8 *) s++;
1280 		    mPUSHu(ch);
1281 	    }
1282 	    else if (checksum > bits_in_uv)
1283 		while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1284 	    else
1285 		while (len-- > 0) cuv += *(U8 *) s++;
1286 	    break;
1287 	case 'U':
1288 	    if (len == 0) {
1289                 if (explicit_length && howlen != e_star) {
1290 		    /* Switch to "bytes in UTF-8" mode */
1291 		    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1292 		    else
1293 			/* Should be impossible due to the need_utf8() test */
1294 			Perl_croak(aTHX_ "U0 mode on a byte string");
1295 		}
1296 		break;
1297 	    }
1298 	    if (len > strend - s) len = strend - s;
1299 	    if (!checksum) {
1300 		if (len && unpack_only_one) len = 1;
1301 		EXTEND(SP, len);
1302 		EXTEND_MORTAL(len);
1303 	    }
1304 	    while (len-- > 0 && s < strend) {
1305 		STRLEN retlen;
1306 		UV auv;
1307 		if (utf8) {
1308 		    U8 result[UTF8_MAXLEN+1];
1309 		    const char *ptr = s;
1310 		    STRLEN len;
1311 		    /* Bug: warns about bad utf8 even if we are short on bytes
1312 		       and will break out of the loop */
1313 		    if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1314 				      'U'))
1315 			break;
1316 		    len = UTF8SKIP(result);
1317 		    if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1318 				      (char *) &result[1], len-1, 'U')) break;
1319 		    auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1320                                                        len,
1321                                                        &retlen,
1322                                                        UTF8_ALLOW_DEFAULT));
1323 		    s = ptr;
1324 		} else {
1325 		    auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1326                                                        strend - s,
1327                                                        &retlen,
1328                                                        UTF8_ALLOW_DEFAULT));
1329 		    if (retlen == (STRLEN) -1)
1330 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1331 		    s += retlen;
1332 		}
1333 		if (!checksum)
1334 		    mPUSHu(auv);
1335 		else if (checksum > bits_in_uv)
1336 		    cdouble += (NV) auv;
1337 		else
1338 		    cuv += auv;
1339 	    }
1340 	    break;
1341 	case 's' | TYPE_IS_SHRIEKING:
1342 #if SHORTSIZE != SIZE16
1343 	    while (len-- > 0) {
1344 		short ashort;
1345                 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1346 		if (!checksum)
1347 		    mPUSHi(ashort);
1348 		else if (checksum > bits_in_uv)
1349 		    cdouble += (NV)ashort;
1350 		else
1351 		    cuv += ashort;
1352 	    }
1353 	    break;
1354 #else
1355 	    /* FALLTHROUGH */
1356 #endif
1357 	case 's':
1358 	    while (len-- > 0) {
1359 		I16 ai16;
1360 
1361 #if U16SIZE > SIZE16
1362 		ai16 = 0;
1363 #endif
1364                 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1365 #if U16SIZE > SIZE16
1366 		if (ai16 > 32767)
1367 		    ai16 -= 65536;
1368 #endif
1369 		if (!checksum)
1370 		    mPUSHi(ai16);
1371 		else if (checksum > bits_in_uv)
1372 		    cdouble += (NV)ai16;
1373 		else
1374 		    cuv += ai16;
1375 	    }
1376 	    break;
1377 	case 'S' | TYPE_IS_SHRIEKING:
1378 #if SHORTSIZE != SIZE16
1379 	    while (len-- > 0) {
1380 		unsigned short aushort;
1381 		SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1382 		if (!checksum)
1383 		    mPUSHu(aushort);
1384 		else if (checksum > bits_in_uv)
1385 		    cdouble += (NV)aushort;
1386 		else
1387 		    cuv += aushort;
1388 	    }
1389 	    break;
1390 #else
1391             /* FALLTHROUGH */
1392 #endif
1393 	case 'v':
1394 	case 'n':
1395 	case 'S':
1396 	    while (len-- > 0) {
1397 		U16 au16;
1398 #if U16SIZE > SIZE16
1399 		au16 = 0;
1400 #endif
1401                 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1402 		if (datumtype == 'n')
1403 		    au16 = PerlSock_ntohs(au16);
1404 		if (datumtype == 'v')
1405 		    au16 = vtohs(au16);
1406 		if (!checksum)
1407 		    mPUSHu(au16);
1408 		else if (checksum > bits_in_uv)
1409 		    cdouble += (NV) au16;
1410 		else
1411 		    cuv += au16;
1412 	    }
1413 	    break;
1414 	case 'v' | TYPE_IS_SHRIEKING:
1415 	case 'n' | TYPE_IS_SHRIEKING:
1416 	    while (len-- > 0) {
1417 		I16 ai16;
1418 # if U16SIZE > SIZE16
1419 		ai16 = 0;
1420 # endif
1421                 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1422                 /* There should never be any byte-swapping here.  */
1423                 assert(!TYPE_ENDIANNESS(datumtype));
1424 		if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1425 		    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1426 		if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1427 		    ai16 = (I16) vtohs((U16) ai16);
1428 		if (!checksum)
1429 		    mPUSHi(ai16);
1430 		else if (checksum > bits_in_uv)
1431 		    cdouble += (NV) ai16;
1432 		else
1433 		    cuv += ai16;
1434 	    }
1435 	    break;
1436 	case 'i':
1437 	case 'i' | TYPE_IS_SHRIEKING:
1438 	    while (len-- > 0) {
1439 		int aint;
1440                 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1441 		if (!checksum)
1442 		    mPUSHi(aint);
1443 		else if (checksum > bits_in_uv)
1444 		    cdouble += (NV)aint;
1445 		else
1446 		    cuv += aint;
1447 	    }
1448 	    break;
1449 	case 'I':
1450 	case 'I' | TYPE_IS_SHRIEKING:
1451 	    while (len-- > 0) {
1452 		unsigned int auint;
1453                 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1454 		if (!checksum)
1455 		    mPUSHu(auint);
1456 		else if (checksum > bits_in_uv)
1457 		    cdouble += (NV)auint;
1458 		else
1459 		    cuv += auint;
1460 	    }
1461 	    break;
1462 	case 'j':
1463 	    while (len-- > 0) {
1464 		IV aiv;
1465                 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1466 		if (!checksum)
1467 		    mPUSHi(aiv);
1468 		else if (checksum > bits_in_uv)
1469 		    cdouble += (NV)aiv;
1470 		else
1471 		    cuv += aiv;
1472 	    }
1473 	    break;
1474 	case 'J':
1475 	    while (len-- > 0) {
1476 		UV auv;
1477                 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1478 		if (!checksum)
1479 		    mPUSHu(auv);
1480 		else if (checksum > bits_in_uv)
1481 		    cdouble += (NV)auv;
1482 		else
1483 		    cuv += auv;
1484 	    }
1485 	    break;
1486 	case 'l' | TYPE_IS_SHRIEKING:
1487 #if LONGSIZE != SIZE32
1488 	    while (len-- > 0) {
1489 		long along;
1490                 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1491 		if (!checksum)
1492 		    mPUSHi(along);
1493 		else if (checksum > bits_in_uv)
1494 		    cdouble += (NV)along;
1495 		else
1496 		    cuv += along;
1497 	    }
1498 	    break;
1499 #else
1500 	    /* FALLTHROUGH */
1501 #endif
1502 	case 'l':
1503 	    while (len-- > 0) {
1504 		I32 ai32;
1505 #if U32SIZE > SIZE32
1506 		ai32 = 0;
1507 #endif
1508                 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1509 #if U32SIZE > SIZE32
1510 		if (ai32 > 2147483647) ai32 -= 4294967296;
1511 #endif
1512 		if (!checksum)
1513 		    mPUSHi(ai32);
1514 		else if (checksum > bits_in_uv)
1515 		    cdouble += (NV)ai32;
1516 		else
1517 		    cuv += ai32;
1518 	    }
1519 	    break;
1520 	case 'L' | TYPE_IS_SHRIEKING:
1521 #if LONGSIZE != SIZE32
1522 	    while (len-- > 0) {
1523 		unsigned long aulong;
1524                 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1525 		if (!checksum)
1526 		    mPUSHu(aulong);
1527 		else if (checksum > bits_in_uv)
1528 		    cdouble += (NV)aulong;
1529 		else
1530 		    cuv += aulong;
1531 	    }
1532 	    break;
1533 #else
1534             /* FALLTHROUGH */
1535 #endif
1536 	case 'V':
1537 	case 'N':
1538 	case 'L':
1539 	    while (len-- > 0) {
1540 		U32 au32;
1541 #if U32SIZE > SIZE32
1542 		au32 = 0;
1543 #endif
1544                 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1545 		if (datumtype == 'N')
1546 		    au32 = PerlSock_ntohl(au32);
1547 		if (datumtype == 'V')
1548 		    au32 = vtohl(au32);
1549 		if (!checksum)
1550 		    mPUSHu(au32);
1551 		else if (checksum > bits_in_uv)
1552 		    cdouble += (NV)au32;
1553 		else
1554 		    cuv += au32;
1555 	    }
1556 	    break;
1557 	case 'V' | TYPE_IS_SHRIEKING:
1558 	case 'N' | TYPE_IS_SHRIEKING:
1559 	    while (len-- > 0) {
1560 		I32 ai32;
1561 #if U32SIZE > SIZE32
1562 		ai32 = 0;
1563 #endif
1564                 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1565                 /* There should never be any byte swapping here.  */
1566                 assert(!TYPE_ENDIANNESS(datumtype));
1567 		if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1568 		    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1569 		if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1570 		    ai32 = (I32)vtohl((U32)ai32);
1571 		if (!checksum)
1572 		    mPUSHi(ai32);
1573 		else if (checksum > bits_in_uv)
1574 		    cdouble += (NV)ai32;
1575 		else
1576 		    cuv += ai32;
1577 	    }
1578 	    break;
1579 	case 'p':
1580 	    while (len-- > 0) {
1581 		const char *aptr;
1582                 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1583 		/* newSVpv generates undef if aptr is NULL */
1584 		mPUSHs(newSVpv(aptr, 0));
1585 	    }
1586 	    break;
1587 	case 'w':
1588 	    {
1589 		UV auv = 0;
1590 		size_t bytes = 0;
1591 
1592 		while (len > 0 && s < strend) {
1593 		    U8 ch;
1594 		    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1595 		    auv = (auv << 7) | (ch & 0x7f);
1596                     /* UTF8_IS_XXXXX not right here because this is a BER, not
1597                      * UTF-8 format - using constant 0x80 */
1598 		    if (ch < 0x80) {
1599 			bytes = 0;
1600 			mPUSHu(auv);
1601 			len--;
1602 			auv = 0;
1603 			continue;
1604 		    }
1605 		    if (++bytes >= sizeof(UV)) {	/* promote to string */
1606 			const char *t;
1607 
1608 			sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1609                                                  (int)TYPE_DIGITS(UV), auv);
1610 			while (s < strend) {
1611 			    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1612 			    sv = mul128(sv, (U8)(ch & 0x7f));
1613 			    if (!(ch & 0x80)) {
1614 				bytes = 0;
1615 				break;
1616 			    }
1617 			}
1618 			t = SvPV_nolen_const(sv);
1619 			while (*t == '0')
1620 			    t++;
1621 			sv_chop(sv, t);
1622 			mPUSHs(sv);
1623 			len--;
1624 			auv = 0;
1625 		    }
1626 		}
1627 		if ((s >= strend) && bytes)
1628 		    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1629 	    }
1630 	    break;
1631 	case 'P':
1632 	    if (symptr->howlen == e_star)
1633 	        Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1634 	    EXTEND(SP, 1);
1635 	    if (s + sizeof(char*) <= strend) {
1636 		char *aptr;
1637                 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1638 		/* newSVpvn generates undef if aptr is NULL */
1639 		PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1640 	    }
1641 	    break;
1642 #if defined(HAS_QUAD) && IVSIZE >= 8
1643 	case 'q':
1644 	    while (len-- > 0) {
1645 		Quad_t aquad;
1646                 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1647 		if (!checksum)
1648                     mPUSHs(newSViv((IV)aquad));
1649 		else if (checksum > bits_in_uv)
1650 		    cdouble += (NV)aquad;
1651 		else
1652 		    cuv += aquad;
1653 	    }
1654 	    break;
1655 	case 'Q':
1656 	    while (len-- > 0) {
1657 		Uquad_t auquad;
1658                 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1659 		if (!checksum)
1660 		    mPUSHs(newSVuv((UV)auquad));
1661 		else if (checksum > bits_in_uv)
1662 		    cdouble += (NV)auquad;
1663 		else
1664 		    cuv += auquad;
1665 	    }
1666 	    break;
1667 #endif
1668 	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
1669 	case 'f':
1670 	    while (len-- > 0) {
1671 		float afloat;
1672                 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1673 		if (!checksum)
1674 		    mPUSHn(afloat);
1675 		else
1676 		    cdouble += afloat;
1677 	    }
1678 	    break;
1679 	case 'd':
1680 	    while (len-- > 0) {
1681 		double adouble;
1682                 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1683 		if (!checksum)
1684 		    mPUSHn(adouble);
1685 		else
1686 		    cdouble += adouble;
1687 	    }
1688 	    break;
1689 	case 'F':
1690 	    while (len-- > 0) {
1691 		NV_bytes anv;
1692                 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1693                             datumtype, needs_swap);
1694 		if (!checksum)
1695 		    mPUSHn(anv.nv);
1696 		else
1697 		    cdouble += anv.nv;
1698 	    }
1699 	    break;
1700 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1701 	case 'D':
1702 	    while (len-- > 0) {
1703 		ld_bytes aldouble;
1704                 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1705                             sizeof(aldouble.bytes), datumtype, needs_swap);
1706                 /* The most common long double format, the x86 80-bit
1707                  * extended precision, has either 2 or 6 unused bytes,
1708                  * which may contain garbage, which may contain
1709                  * unintentional data.  While we do zero the bytes of
1710                  * the long double data in pack(), here in unpack() we
1711                  * don't, because it's really hard to envision that
1712                  * reading the long double off aldouble would be
1713                  * affected by the unused bytes.
1714                  *
1715                  * Note that trying to unpack 'long doubles' of 'long
1716                  * doubles' packed in another system is in the general
1717                  * case doomed without having more detail. */
1718 		if (!checksum)
1719 		    mPUSHn(aldouble.ld);
1720 		else
1721 		    cdouble += aldouble.ld;
1722 	    }
1723 	    break;
1724 #endif
1725 	case 'u':
1726 	    if (!checksum) {
1727                 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1728 		sv = sv_2mortal(newSV(l));
1729 		if (l) {
1730                     SvPOK_on(sv);
1731                     *SvEND(sv) = '\0';
1732                 }
1733 	    }
1734 
1735             /* Note that all legal uuencoded strings are ASCII printables, so
1736              * have the same representation under UTF-8 vs not.  This means we
1737              * can ignore UTF8ness on legal input.  For illegal we stop at the
1738              * first failure, and don't report where/what that is, so again we
1739              * can ignore UTF8ness */
1740 
1741             while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1742                 I32 a, b, c, d;
1743                 char hunk[3];
1744 
1745                 len = PL_uudmap[*(U8*)s++] & 077;
1746                 while (len > 0) {
1747                     if (s < strend && ISUUCHAR(*s))
1748                         a = PL_uudmap[*(U8*)s++] & 077;
1749                     else
1750                         a = 0;
1751                     if (s < strend && ISUUCHAR(*s))
1752                         b = PL_uudmap[*(U8*)s++] & 077;
1753                     else
1754                         b = 0;
1755                     if (s < strend && ISUUCHAR(*s))
1756                         c = PL_uudmap[*(U8*)s++] & 077;
1757                     else
1758                         c = 0;
1759                     if (s < strend && ISUUCHAR(*s))
1760                         d = PL_uudmap[*(U8*)s++] & 077;
1761                     else
1762                         d = 0;
1763                     hunk[0] = (char)((a << 2) | (b >> 4));
1764                     hunk[1] = (char)((b << 4) | (c >> 2));
1765                     hunk[2] = (char)((c << 6) | d);
1766                     if (!checksum)
1767                         sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1768                     len -= 3;
1769                 }
1770                 if (*s == '\n')
1771                     s++;
1772                 else	/* possible checksum byte */
1773                     if (s + 1 < strend && s[1] == '\n')
1774                         s += 2;
1775             }
1776 	    if (!checksum)
1777 		XPUSHs(sv);
1778 	    break;
1779 	} /* End of switch */
1780 
1781 	if (checksum) {
1782 	    if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1783 	      (checksum > bits_in_uv &&
1784 	       strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1785 		NV trouble, anv;
1786 
1787                 anv = (NV) (1 << (checksum & 15));
1788 		while (checksum >= 16) {
1789 		    checksum -= 16;
1790 		    anv *= 65536.0;
1791 		}
1792 		while (cdouble < 0.0)
1793 		    cdouble += anv;
1794 		cdouble = Perl_modf(cdouble / anv, &trouble);
1795 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1796                 /* Workaround for powerpc doubledouble modfl bug:
1797                  * close to 1.0L and -1.0L cdouble is 0, and trouble
1798                  * is cdouble / anv. */
1799                 if (trouble != Perl_ceil(trouble)) {
1800                   cdouble = trouble;
1801                   if (cdouble >  1.0L) cdouble -= 1.0L;
1802                   if (cdouble < -1.0L) cdouble += 1.0L;
1803                 }
1804 #endif
1805                 cdouble *= anv;
1806 		sv = newSVnv(cdouble);
1807 	    }
1808 	    else {
1809 		if (checksum < bits_in_uv) {
1810 		    UV mask = ((UV)1 << checksum) - 1;
1811 		    cuv &= mask;
1812 		}
1813 		sv = newSVuv(cuv);
1814 	    }
1815 	    mXPUSHs(sv);
1816 	    checksum = 0;
1817 	}
1818 
1819         if (symptr->flags & FLAG_SLASH){
1820             if (SP - PL_stack_base - start_sp_offset <= 0)
1821 		break;
1822             if( next_symbol(symptr) ){
1823               if( symptr->howlen == e_number )
1824 		Perl_croak(aTHX_ "Count after length/code in unpack" );
1825               if( beyond ){
1826          	/* ...end of char buffer then no decent length available */
1827 		Perl_croak(aTHX_ "length/code after end of string in unpack" );
1828               } else {
1829          	/* take top of stack (hope it's numeric) */
1830                 len = POPi;
1831                 if( len < 0 )
1832                     Perl_croak(aTHX_ "Negative '/' count in unpack" );
1833               }
1834             } else {
1835 		Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1836             }
1837             datumtype = symptr->code;
1838             explicit_length = FALSE;
1839 	    goto redo_switch;
1840         }
1841     }
1842 
1843     if (new_s)
1844 	*new_s = s;
1845     PUTBACK;
1846     return SP - PL_stack_base - start_sp_offset;
1847 }
1848 
PP(pp_unpack)1849 PP(pp_unpack)
1850 {
1851     dSP;
1852     dPOPPOPssrl;
1853     U8 gimme = GIMME_V;
1854     STRLEN llen;
1855     STRLEN rlen;
1856     const char *pat = SvPV_const(left,  llen);
1857     const char *s   = SvPV_const(right, rlen);
1858     const char *strend = s + rlen;
1859     const char *patend = pat + llen;
1860     SSize_t cnt;
1861 
1862     PUTBACK;
1863     cnt = unpackstring(pat, patend, s, strend,
1864 		     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1865 		     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1866 
1867     SPAGAIN;
1868     if ( !cnt && gimme == G_SCALAR )
1869        PUSHs(&PL_sv_undef);
1870     RETURN;
1871 }
1872 
1873 STATIC U8 *
doencodes(U8 * h,const U8 * s,SSize_t len)1874 doencodes(U8 *h, const U8 *s, SSize_t len)
1875 {
1876     *h++ = PL_uuemap[len];
1877     while (len > 2) {
1878 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
1879 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1880 	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1881 	*h++ = PL_uuemap[(077 & (s[2] & 077))];
1882 	s += 3;
1883 	len -= 3;
1884     }
1885     if (len > 0) {
1886         const U8 r = (len > 1 ? s[1] : '\0');
1887 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
1888 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1889 	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1890 	*h++ = PL_uuemap[0];
1891     }
1892     *h++ = '\n';
1893     return h;
1894 }
1895 
1896 STATIC SV *
S_is_an_int(pTHX_ const char * s,STRLEN l)1897 S_is_an_int(pTHX_ const char *s, STRLEN l)
1898 {
1899   SV *result = newSVpvn(s, l);
1900   char *const result_c = SvPV_nolen(result);	/* convenience */
1901   char *out = result_c;
1902   bool skip = 1;
1903   bool ignore = 0;
1904 
1905   PERL_ARGS_ASSERT_IS_AN_INT;
1906 
1907   while (*s) {
1908     switch (*s) {
1909     case ' ':
1910       break;
1911     case '+':
1912       if (!skip) {
1913 	SvREFCNT_dec(result);
1914 	return (NULL);
1915       }
1916       break;
1917     case '0':
1918     case '1':
1919     case '2':
1920     case '3':
1921     case '4':
1922     case '5':
1923     case '6':
1924     case '7':
1925     case '8':
1926     case '9':
1927       skip = 0;
1928       if (!ignore) {
1929 	*(out++) = *s;
1930       }
1931       break;
1932     case '.':
1933       ignore = 1;
1934       break;
1935     default:
1936       SvREFCNT_dec(result);
1937       return (NULL);
1938     }
1939     s++;
1940   }
1941   *(out++) = '\0';
1942   SvCUR_set(result, out - result_c);
1943   return (result);
1944 }
1945 
1946 /* pnum must be '\0' terminated */
1947 STATIC int
S_div128(pTHX_ SV * pnum,bool * done)1948 S_div128(pTHX_ SV *pnum, bool *done)
1949 {
1950     STRLEN len;
1951     char * const s = SvPV(pnum, len);
1952     char *t = s;
1953     int m = 0;
1954 
1955     PERL_ARGS_ASSERT_DIV128;
1956 
1957     *done = 1;
1958     while (*t) {
1959 	const int i = m * 10 + (*t - '0');
1960 	const int r = (i >> 7); /* r < 10 */
1961 	m = i & 0x7F;
1962 	if (r) {
1963 	    *done = 0;
1964 	}
1965 	*(t++) = '0' + r;
1966     }
1967     *(t++) = '\0';
1968     SvCUR_set(pnum, (STRLEN) (t - s));
1969     return (m);
1970 }
1971 
1972 /*
1973 =for apidoc packlist
1974 
1975 The engine implementing C<pack()> Perl function.
1976 
1977 =cut
1978 */
1979 
1980 void
Perl_packlist(pTHX_ SV * cat,const char * pat,const char * patend,SV ** beglist,SV ** endlist)1981 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1982 {
1983     tempsym_t sym;
1984 
1985     PERL_ARGS_ASSERT_PACKLIST;
1986 
1987     TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1988 
1989     /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1990        Also make sure any UTF8 flag is loaded */
1991     SvPV_force_nolen(cat);
1992     if (DO_UTF8(cat))
1993 	sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1994 
1995     (void)pack_rec( cat, &sym, beglist, endlist );
1996 }
1997 
1998 /* like sv_utf8_upgrade, but also repoint the group start markers */
1999 STATIC void
marked_upgrade(pTHX_ SV * sv,tempsym_t * sym_ptr)2000 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2001     STRLEN len;
2002     tempsym_t *group;
2003     const char *from_ptr, *from_start, *from_end, **marks, **m;
2004     char *to_start, *to_ptr;
2005 
2006     if (SvUTF8(sv)) return;
2007 
2008     from_start = SvPVX_const(sv);
2009     from_end = from_start + SvCUR(sv);
2010     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2011 	if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2012     if (from_ptr == from_end) {
2013 	/* Simple case: no character needs to be changed */
2014 	SvUTF8_on(sv);
2015 	return;
2016     }
2017 
2018     len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2019     Newx(to_start, len, char);
2020     Copy(from_start, to_start, from_ptr-from_start, char);
2021     to_ptr = to_start + (from_ptr-from_start);
2022 
2023     Newx(marks, sym_ptr->level+2, const char *);
2024     for (group=sym_ptr; group; group = group->previous)
2025 	marks[group->level] = from_start + group->strbeg;
2026     marks[sym_ptr->level+1] = from_end+1;
2027     for (m = marks; *m < from_ptr; m++)
2028 	*m = to_start + (*m-from_start);
2029 
2030     for (;from_ptr < from_end; from_ptr++) {
2031 	while (*m == from_ptr) *m++ = to_ptr;
2032 	to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2033     }
2034     *to_ptr = 0;
2035 
2036     while (*m == from_ptr) *m++ = to_ptr;
2037     if (m != marks + sym_ptr->level+1) {
2038 	Safefree(marks);
2039 	Safefree(to_start);
2040 	Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2041 		   "level=%d", m, marks, sym_ptr->level);
2042     }
2043     for (group=sym_ptr; group; group = group->previous)
2044 	group->strbeg = marks[group->level] - to_start;
2045     Safefree(marks);
2046 
2047     if (SvOOK(sv)) {
2048 	if (SvIVX(sv)) {
2049 	    SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2050 	    from_start -= SvIVX(sv);
2051 	    SvIV_set(sv, 0);
2052 	}
2053 	SvFLAGS(sv) &= ~SVf_OOK;
2054     }
2055     if (SvLEN(sv) != 0)
2056 	Safefree(from_start);
2057     SvPV_set(sv, to_start);
2058     SvCUR_set(sv, to_ptr - to_start);
2059     SvLEN_set(sv, len);
2060     SvUTF8_on(sv);
2061 }
2062 
2063 /* Exponential string grower. Makes string extension effectively O(n)
2064    needed says how many extra bytes we need (not counting the final '\0')
2065    Only grows the string if there is an actual lack of space
2066 */
2067 STATIC char *
S_sv_exp_grow(pTHX_ SV * sv,STRLEN needed)2068 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2069     const STRLEN cur = SvCUR(sv);
2070     const STRLEN len = SvLEN(sv);
2071     STRLEN extend;
2072 
2073     PERL_ARGS_ASSERT_SV_EXP_GROW;
2074 
2075     if (len - cur > needed) return SvPVX(sv);
2076     extend = needed > len ? needed : len;
2077     return SvGROW(sv, len+extend+1);
2078 }
2079 
2080 static SV *
S_sv_check_infnan(pTHX_ SV * sv,I32 datumtype)2081 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2082 {
2083     SvGETMAGIC(sv);
2084     if (UNLIKELY(SvAMAGIC(sv)))
2085 	sv = sv_2num(sv);
2086     if (UNLIKELY(isinfnansv(sv))) {
2087 	const I32 c = TYPE_NO_MODIFIERS(datumtype);
2088 	const NV nv = SvNV_nomg(sv);
2089 	if (c == 'w')
2090 	    Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2091 	else
2092 	    Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2093     }
2094     return sv;
2095 }
2096 
2097 #define SvIV_no_inf(sv,d) \
2098 	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2099 #define SvUV_no_inf(sv,d) \
2100 	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2101 
2102 STATIC
2103 SV **
S_pack_rec(pTHX_ SV * cat,tempsym_t * symptr,SV ** beglist,SV ** endlist)2104 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2105 {
2106     tempsym_t lookahead;
2107     SSize_t items  = endlist - beglist;
2108     bool found = next_symbol(symptr);
2109     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2110     bool warn_utf8 = ckWARN(WARN_UTF8);
2111     char* from;
2112 
2113     PERL_ARGS_ASSERT_PACK_REC;
2114 
2115     if (symptr->level == 0 && found && symptr->code == 'U') {
2116 	marked_upgrade(aTHX_ cat, symptr);
2117 	symptr->flags |= FLAG_DO_UTF8;
2118 	utf8 = 0;
2119     }
2120     symptr->strbeg = SvCUR(cat);
2121 
2122     while (found) {
2123 	SV *fromstr;
2124 	STRLEN fromlen;
2125 	SSize_t len;
2126 	SV *lengthcode = NULL;
2127         I32 datumtype = symptr->code;
2128         howlen_t howlen = symptr->howlen;
2129 	char *start = SvPVX(cat);
2130 	char *cur   = start + SvCUR(cat);
2131         bool needs_swap;
2132 
2133 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2134 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2135 
2136         switch (howlen) {
2137 	  case e_star:
2138 	    len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2139 		0 : items;
2140 	    break;
2141 	  default:
2142 	    /* e_no_len and e_number */
2143 	    len = symptr->length;
2144 	    break;
2145         }
2146 
2147 	if (len) {
2148 	    packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2149 
2150 	    if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2151 		/* We can process this letter. */
2152 		STRLEN size = props & PACK_SIZE_MASK;
2153 		GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2154 	    }
2155         }
2156 
2157         /* Look ahead for next symbol. Do we have code/code? */
2158         lookahead = *symptr;
2159         found = next_symbol(&lookahead);
2160 	if (symptr->flags & FLAG_SLASH) {
2161 	    IV count;
2162 	    if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2163 	    if (strchr("aAZ", lookahead.code)) {
2164 		if (lookahead.howlen == e_number) count = lookahead.length;
2165 		else {
2166 		    if (items > 0) {
2167 			count = sv_len_utf8(*beglist);
2168 		    }
2169 		    else count = 0;
2170 		    if (lookahead.code == 'Z') count++;
2171 		}
2172 	    } else {
2173 		if (lookahead.howlen == e_number && lookahead.length < items)
2174 		    count = lookahead.length;
2175 		else count = items;
2176 	    }
2177 	    lookahead.howlen = e_number;
2178 	    lookahead.length = count;
2179 	    lengthcode = sv_2mortal(newSViv(count));
2180 	}
2181 
2182         needs_swap = NEEDS_SWAP(datumtype);
2183 
2184 	/* Code inside the switch must take care to properly update
2185 	   cat (CUR length and '\0' termination) if it updated *cur and
2186 	   doesn't simply leave using break */
2187 	switch (TYPE_NO_ENDIANNESS(datumtype)) {
2188 	default:
2189 	    Perl_croak(aTHX_ "Invalid type '%c' in pack",
2190 		       (int) TYPE_NO_MODIFIERS(datumtype));
2191 	case '%':
2192 	    Perl_croak(aTHX_ "'%%' may not be used in pack");
2193 
2194 	case '.' | TYPE_IS_SHRIEKING:
2195 	case '.':
2196 	    if (howlen == e_star) from = start;
2197 	    else if (len == 0) from = cur;
2198 	    else {
2199 		tempsym_t *group = symptr;
2200 
2201 		while (--len && group) group = group->previous;
2202 		from = group ? start + group->strbeg : start;
2203 	    }
2204 	    fromstr = NEXTFROM;
2205 	    len = SvIV_no_inf(fromstr, datumtype);
2206 	    goto resize;
2207 	case '@' | TYPE_IS_SHRIEKING:
2208 	case '@':
2209 	    from = start + symptr->strbeg;
2210 	  resize:
2211 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2212 		if (len >= 0) {
2213 		    while (len && from < cur) {
2214 			from += UTF8SKIP(from);
2215 			len--;
2216 		    }
2217 		    if (from > cur)
2218 			Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2219 		    if (len) {
2220 			/* Here we know from == cur */
2221 		      grow:
2222 			GROWING(0, cat, start, cur, len);
2223 			Zero(cur, len, char);
2224 			cur += len;
2225 		    } else if (from < cur) {
2226 			len = cur - from;
2227 			goto shrink;
2228 		    } else goto no_change;
2229 		} else {
2230 		    cur = from;
2231 		    len = -len;
2232 		    goto utf8_shrink;
2233 		}
2234 	    else {
2235 		len -= cur - from;
2236 		if (len > 0) goto grow;
2237 		if (len == 0) goto no_change;
2238 		len = -len;
2239 		goto shrink;
2240 	    }
2241 	    break;
2242 
2243 	case '(': {
2244             tempsym_t savsym = *symptr;
2245 	    U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2246 	    symptr->flags |= group_modifiers;
2247             symptr->patend = savsym.grpend;
2248             symptr->level++;
2249 	    symptr->previous = &lookahead;
2250 	    while (len--) {
2251 		U32 was_utf8;
2252 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2253 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
2254 		was_utf8 = SvUTF8(cat);
2255   	        symptr->patptr = savsym.grpbeg;
2256 		beglist = pack_rec(cat, symptr, beglist, endlist);
2257 		if (SvUTF8(cat) != was_utf8)
2258 		    /* This had better be an upgrade while in utf8==0 mode */
2259 		    utf8 = 1;
2260 
2261 		if (savsym.howlen == e_star && beglist == endlist)
2262 		    break;		/* No way to continue */
2263 	    }
2264 	    items = endlist - beglist;
2265 	    lookahead.flags  = symptr->flags & ~group_modifiers;
2266 	    goto no_change;
2267 	}
2268 	case 'X' | TYPE_IS_SHRIEKING:
2269 	    if (!len)			/* Avoid division by 0 */
2270 		len = 1;
2271 	    if (utf8) {
2272 		char *hop, *last;
2273 		SSize_t l = len;
2274 		hop = last = start;
2275 		while (hop < cur) {
2276 		    hop += UTF8SKIP(hop);
2277 		    if (--l == 0) {
2278 			last = hop;
2279 			l = len;
2280 		    }
2281 		}
2282 		if (last > cur)
2283 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2284 		cur = last;
2285 		break;
2286 	    }
2287 	    len = (cur-start) % len;
2288 	    /* FALLTHROUGH */
2289 	case 'X':
2290 	    if (utf8) {
2291 		if (len < 1) goto no_change;
2292 	      utf8_shrink:
2293 		while (len > 0) {
2294 		    if (cur <= start)
2295 			Perl_croak(aTHX_ "'%c' outside of string in pack",
2296 				   (int) TYPE_NO_MODIFIERS(datumtype));
2297 		    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2298 			if (cur <= start)
2299 			    Perl_croak(aTHX_ "'%c' outside of string in pack",
2300 				       (int) TYPE_NO_MODIFIERS(datumtype));
2301 		    }
2302 		    len--;
2303 		}
2304 	    } else {
2305 	      shrink:
2306 		if (cur - start < len)
2307 		    Perl_croak(aTHX_ "'%c' outside of string in pack",
2308 			       (int) TYPE_NO_MODIFIERS(datumtype));
2309 		cur -= len;
2310 	    }
2311 	    if (cur < start+symptr->strbeg) {
2312 		/* Make sure group starts don't point into the void */
2313 		tempsym_t *group;
2314 		const STRLEN length = cur-start;
2315 		for (group = symptr;
2316 		     group && length < group->strbeg;
2317 		     group = group->previous) group->strbeg = length;
2318 		lookahead.strbeg = length;
2319 	    }
2320 	    break;
2321 	case 'x' | TYPE_IS_SHRIEKING: {
2322 	    SSize_t ai32;
2323 	    if (!len)			/* Avoid division by 0 */
2324 		len = 1;
2325 	    if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2326 	    else      ai32 = (cur - start) % len;
2327 	    if (ai32 == 0) goto no_change;
2328 	    len -= ai32;
2329 	}
2330 	/* FALLTHROUGH */
2331 	case 'x':
2332 	    goto grow;
2333 	case 'A':
2334 	case 'Z':
2335 	case 'a': {
2336 	    const char *aptr;
2337 
2338 	    fromstr = NEXTFROM;
2339 	    aptr = SvPV_const(fromstr, fromlen);
2340 	    if (DO_UTF8(fromstr)) {
2341                 const char *end, *s;
2342 
2343 		if (!utf8 && !SvUTF8(cat)) {
2344 		    marked_upgrade(aTHX_ cat, symptr);
2345 		    lookahead.flags |= FLAG_DO_UTF8;
2346 		    lookahead.strbeg = symptr->strbeg;
2347 		    utf8 = 1;
2348 		    start = SvPVX(cat);
2349 		    cur = start + SvCUR(cat);
2350 		}
2351 		if (howlen == e_star) {
2352 		    if (utf8) goto string_copy;
2353 		    len = fromlen+1;
2354 		}
2355 		s = aptr;
2356 		end = aptr + fromlen;
2357 		fromlen = datumtype == 'Z' ? len-1 : len;
2358 		while ((SSize_t) fromlen > 0 && s < end) {
2359 		    s += UTF8SKIP(s);
2360 		    fromlen--;
2361 		}
2362 		if (s > end)
2363 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2364 		if (utf8) {
2365 		    len = fromlen;
2366 		    if (datumtype == 'Z') len++;
2367 		    fromlen = s-aptr;
2368 		    len += fromlen;
2369 
2370 		    goto string_copy;
2371 		}
2372 		fromlen = len - fromlen;
2373 		if (datumtype == 'Z') fromlen--;
2374 		if (howlen == e_star) {
2375 		    len = fromlen;
2376 		    if (datumtype == 'Z') len++;
2377 		}
2378 		GROWING(0, cat, start, cur, len);
2379 		if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2380 				  datumtype | TYPE_IS_PACK))
2381 		    Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2382 			       "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2383 			       (int)datumtype, aptr, end, cur, fromlen);
2384 		cur += fromlen;
2385 		len -= fromlen;
2386 	    } else if (utf8) {
2387 		if (howlen == e_star) {
2388 		    len = fromlen;
2389 		    if (datumtype == 'Z') len++;
2390 		}
2391 		if (len <= (SSize_t) fromlen) {
2392 		    fromlen = len;
2393 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2394 		}
2395 		/* assumes a byte expands to at most UTF8_EXPAND bytes on
2396 		   upgrade, so:
2397 		   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2398 		GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2399 		len -= fromlen;
2400 		while (fromlen > 0) {
2401 		    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2402 		    aptr++;
2403 		    fromlen--;
2404 		}
2405 	    } else {
2406 	      string_copy:
2407 		if (howlen == e_star) {
2408 		    len = fromlen;
2409 		    if (datumtype == 'Z') len++;
2410 		}
2411 		if (len <= (SSize_t) fromlen) {
2412 		    fromlen = len;
2413 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2414 		}
2415 		GROWING(0, cat, start, cur, len);
2416 		Copy(aptr, cur, fromlen, char);
2417 		cur += fromlen;
2418 		len -= fromlen;
2419 	    }
2420 	    memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2421 	    cur += len;
2422 	    SvTAINT(cat);
2423 	    break;
2424 	}
2425 	case 'B':
2426 	case 'b': {
2427 	    const char *str, *end;
2428 	    SSize_t l, field_len;
2429 	    U8 bits;
2430 	    bool utf8_source;
2431 	    U32 utf8_flags;
2432 
2433 	    fromstr = NEXTFROM;
2434 	    str = SvPV_const(fromstr, fromlen);
2435 	    end = str + fromlen;
2436 	    if (DO_UTF8(fromstr)) {
2437 		utf8_source = TRUE;
2438 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2439 	    } else {
2440 		utf8_source = FALSE;
2441 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2442 	    }
2443 	    if (howlen == e_star) len = fromlen;
2444 	    field_len = (len+7)/8;
2445 	    GROWING(utf8, cat, start, cur, field_len);
2446 	    if (len > (SSize_t)fromlen) len = fromlen;
2447 	    bits = 0;
2448 	    l = 0;
2449 	    if (datumtype == 'B')
2450 		while (l++ < len) {
2451 		    if (utf8_source) {
2452 			UV val = 0;
2453 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2454 			bits |= val & 1;
2455 		    } else bits |= *str++ & 1;
2456 		    if (l & 7) bits <<= 1;
2457 		    else {
2458 			PUSH_BYTE(utf8, cur, bits);
2459 			bits = 0;
2460 		    }
2461 		}
2462 	    else
2463 		/* datumtype == 'b' */
2464 		while (l++ < len) {
2465 		    if (utf8_source) {
2466 			UV val = 0;
2467 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2468 			if (val & 1) bits |= 0x80;
2469 		    } else if (*str++ & 1)
2470 			bits |= 0x80;
2471 		    if (l & 7) bits >>= 1;
2472 		    else {
2473 			PUSH_BYTE(utf8, cur, bits);
2474 			bits = 0;
2475 		    }
2476 		}
2477 	    l--;
2478 	    if (l & 7) {
2479 		if (datumtype == 'B')
2480 		    bits <<= 7 - (l & 7);
2481 		else
2482 		    bits >>= 7 - (l & 7);
2483 		PUSH_BYTE(utf8, cur, bits);
2484 		l += 7;
2485 	    }
2486 	    /* Determine how many chars are left in the requested field */
2487 	    l /= 8;
2488 	    if (howlen == e_star) field_len = 0;
2489 	    else field_len -= l;
2490 	    Zero(cur, field_len, char);
2491 	    cur += field_len;
2492 	    break;
2493 	}
2494 	case 'H':
2495 	case 'h': {
2496 	    const char *str, *end;
2497 	    SSize_t l, field_len;
2498 	    U8 bits;
2499 	    bool utf8_source;
2500 	    U32 utf8_flags;
2501 
2502 	    fromstr = NEXTFROM;
2503 	    str = SvPV_const(fromstr, fromlen);
2504 	    end = str + fromlen;
2505 	    if (DO_UTF8(fromstr)) {
2506 		utf8_source = TRUE;
2507 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2508 	    } else {
2509 		utf8_source = FALSE;
2510 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2511 	    }
2512 	    if (howlen == e_star) len = fromlen;
2513 	    field_len = (len+1)/2;
2514 	    GROWING(utf8, cat, start, cur, field_len);
2515 	    if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2516 	    bits = 0;
2517 	    l = 0;
2518 	    if (datumtype == 'H')
2519 		while (l++ < len) {
2520 		    if (utf8_source) {
2521 			UV val = 0;
2522 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2523 			if (val < 256 && isALPHA(val))
2524 			    bits |= (val + 9) & 0xf;
2525 			else
2526 			    bits |= val & 0xf;
2527 		    } else if (isALPHA(*str))
2528 			bits |= (*str++ + 9) & 0xf;
2529 		    else
2530 			bits |= *str++ & 0xf;
2531 		    if (l & 1) bits <<= 4;
2532 		    else {
2533 			PUSH_BYTE(utf8, cur, bits);
2534 			bits = 0;
2535 		    }
2536 		}
2537 	    else
2538 		while (l++ < len) {
2539 		    if (utf8_source) {
2540 			UV val = 0;
2541 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2542 			if (val < 256 && isALPHA(val))
2543 			    bits |= ((val + 9) & 0xf) << 4;
2544 			else
2545 			    bits |= (val & 0xf) << 4;
2546 		    } else if (isALPHA(*str))
2547 			bits |= ((*str++ + 9) & 0xf) << 4;
2548 		    else
2549 			bits |= (*str++ & 0xf) << 4;
2550 		    if (l & 1) bits >>= 4;
2551 		    else {
2552 			PUSH_BYTE(utf8, cur, bits);
2553 			bits = 0;
2554 		    }
2555 		}
2556 	    l--;
2557 	    if (l & 1) {
2558 		PUSH_BYTE(utf8, cur, bits);
2559 		l++;
2560 	    }
2561 	    /* Determine how many chars are left in the requested field */
2562 	    l /= 2;
2563 	    if (howlen == e_star) field_len = 0;
2564 	    else field_len -= l;
2565 	    Zero(cur, field_len, char);
2566 	    cur += field_len;
2567 	    break;
2568 	}
2569 	case 'c':
2570 	    while (len-- > 0) {
2571 		IV aiv;
2572 		fromstr = NEXTFROM;
2573                 aiv = SvIV_no_inf(fromstr, datumtype);
2574 		if ((-128 > aiv || aiv > 127))
2575 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2576 				   "Character in 'c' format wrapped in pack");
2577 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2578 	    }
2579 	    break;
2580 	case 'C':
2581 	    if (len == 0) {
2582 		utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2583 		break;
2584 	    }
2585 	    while (len-- > 0) {
2586 		IV aiv;
2587 		fromstr = NEXTFROM;
2588                 aiv = SvIV_no_inf(fromstr, datumtype);
2589 		if ((0 > aiv || aiv > 0xff))
2590 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2591 				   "Character in 'C' format wrapped in pack");
2592 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2593 	    }
2594 	    break;
2595 	case 'W': {
2596 	    char *end;
2597 	    U8 in_bytes = (U8)IN_BYTES;
2598 
2599 	    end = start+SvLEN(cat)-1;
2600 	    if (utf8) end -= UTF8_MAXLEN-1;
2601 	    while (len-- > 0) {
2602 		UV auv;
2603 		fromstr = NEXTFROM;
2604 		auv = SvUV_no_inf(fromstr, datumtype);
2605 		if (in_bytes) auv = auv % 0x100;
2606 		if (utf8) {
2607 		  W_utf8:
2608 		    if (cur >= end) {
2609 			*cur = '\0';
2610 			SvCUR_set(cat, cur - start);
2611 
2612 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2613 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2614 		    }
2615 		    cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2616 		} else {
2617 		    if (auv >= 0x100) {
2618 			if (!SvUTF8(cat)) {
2619 			    *cur = '\0';
2620 			    SvCUR_set(cat, cur - start);
2621 			    marked_upgrade(aTHX_ cat, symptr);
2622 			    lookahead.flags |= FLAG_DO_UTF8;
2623 			    lookahead.strbeg = symptr->strbeg;
2624 			    utf8 = 1;
2625 			    start = SvPVX(cat);
2626 			    cur = start + SvCUR(cat);
2627 			    end = start+SvLEN(cat)-UTF8_MAXLEN;
2628 			    goto W_utf8;
2629 			}
2630 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2631 				       "Character in 'W' format wrapped in pack");
2632 			auv &= 0xff;
2633 		    }
2634 		    if (cur >= end) {
2635 			*cur = '\0';
2636 			SvCUR_set(cat, cur - start);
2637 			GROWING(0, cat, start, cur, len+1);
2638 			end = start+SvLEN(cat)-1;
2639 		    }
2640 		    *(U8 *) cur++ = (U8)auv;
2641 		}
2642 	    }
2643 	    break;
2644 	}
2645 	case 'U': {
2646 	    char *end;
2647 
2648 	    if (len == 0) {
2649 		if (!(symptr->flags & FLAG_DO_UTF8)) {
2650 		    marked_upgrade(aTHX_ cat, symptr);
2651 		    lookahead.flags |= FLAG_DO_UTF8;
2652 		    lookahead.strbeg = symptr->strbeg;
2653 		}
2654 		utf8 = 0;
2655 		goto no_change;
2656 	    }
2657 
2658 	    end = start+SvLEN(cat);
2659 	    if (!utf8) end -= UTF8_MAXLEN;
2660 	    while (len-- > 0) {
2661 		UV auv;
2662 		fromstr = NEXTFROM;
2663 		auv = SvUV_no_inf(fromstr, datumtype);
2664 		if (utf8) {
2665 		    U8 buffer[UTF8_MAXLEN+1], *endb;
2666 		    endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
2667 		    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2668 			*cur = '\0';
2669 			SvCUR_set(cat, cur - start);
2670 			GROWING(0, cat, start, cur,
2671 				len+(endb-buffer)*UTF8_EXPAND);
2672 			end = start+SvLEN(cat);
2673 		    }
2674                     cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2675 		} else {
2676 		    if (cur >= end) {
2677 			*cur = '\0';
2678 			SvCUR_set(cat, cur - start);
2679 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2680 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2681 		    }
2682 		    cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2683                                                        UNI_TO_NATIVE(auv),
2684 						       0);
2685 		}
2686 	    }
2687 	    break;
2688 	}
2689 	/* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
2690 	case 'f':
2691 	    while (len-- > 0) {
2692 		float afloat;
2693 		NV anv;
2694 		fromstr = NEXTFROM;
2695 		anv = SvNV(fromstr);
2696 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2697 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2698 		 * on Alpha; fake it if we don't have them.
2699 		 */
2700 		if (anv > FLT_MAX)
2701 		    afloat = FLT_MAX;
2702 		else if (anv < -FLT_MAX)
2703 		    afloat = -FLT_MAX;
2704 		else afloat = (float)anv;
2705 # else
2706 #  if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2707 		if(Perl_isnan(anv))
2708 		    afloat = (float)NV_NAN;
2709 		else
2710 #  endif
2711 #  ifdef NV_INF
2712                 /* a simple cast to float is undefined if outside
2713                  * the range of values that can be represented */
2714 		afloat = (float)(anv >  FLT_MAX ?  NV_INF :
2715                                  anv < -FLT_MAX ? -NV_INF : anv);
2716 #  endif
2717 # endif
2718                 PUSH_VAR(utf8, cur, afloat, needs_swap);
2719 	    }
2720 	    break;
2721 	case 'd':
2722 	    while (len-- > 0) {
2723 		double adouble;
2724 		NV anv;
2725 		fromstr = NEXTFROM;
2726 		anv = SvNV(fromstr);
2727 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2728 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2729 		 * on Alpha; fake it if we don't have them.
2730 		 */
2731 		if (anv > DBL_MAX)
2732 		    adouble = DBL_MAX;
2733 		else if (anv < -DBL_MAX)
2734 		    adouble = -DBL_MAX;
2735 		else adouble = (double)anv;
2736 # else
2737 		adouble = (double)anv;
2738 # endif
2739                 PUSH_VAR(utf8, cur, adouble, needs_swap);
2740 	    }
2741 	    break;
2742 	case 'F': {
2743 	    NV_bytes anv;
2744 	    Zero(&anv, 1, NV); /* can be long double with unused bits */
2745 	    while (len-- > 0) {
2746 		fromstr = NEXTFROM;
2747 #ifdef __GNUC__
2748 		/* to work round a gcc/x86 bug; don't use SvNV */
2749 		anv.nv = sv_2nv(fromstr);
2750 #    if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2751          && LONG_DOUBLESIZE > 10
2752                 /* GCC sometimes overwrites the padding in the
2753                    assignment above */
2754                 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2755 #    endif
2756 #else
2757 		anv.nv = SvNV(fromstr);
2758 #endif
2759                 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2760 	    }
2761 	    break;
2762 	}
2763 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2764 	case 'D': {
2765 	    ld_bytes aldouble;
2766 	    /* long doubles can have unused bits, which may be nonzero */
2767 	    Zero(&aldouble, 1, long double);
2768 	    while (len-- > 0) {
2769 		fromstr = NEXTFROM;
2770 #  ifdef __GNUC__
2771 		/* to work round a gcc/x86 bug; don't use SvNV */
2772 		aldouble.ld = (long double)sv_2nv(fromstr);
2773 #    if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2774                 /* GCC sometimes overwrites the padding in the
2775                    assignment above */
2776                 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2777 #    endif
2778 #  else
2779 		aldouble.ld = (long double)SvNV(fromstr);
2780 #  endif
2781                 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2782                            needs_swap);
2783 	    }
2784 	    break;
2785 	}
2786 #endif
2787 	case 'n' | TYPE_IS_SHRIEKING:
2788 	case 'n':
2789 	    while (len-- > 0) {
2790 		I16 ai16;
2791 		fromstr = NEXTFROM;
2792 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2793 		ai16 = PerlSock_htons(ai16);
2794                 PUSH16(utf8, cur, &ai16, FALSE);
2795 	    }
2796 	    break;
2797 	case 'v' | TYPE_IS_SHRIEKING:
2798 	case 'v':
2799 	    while (len-- > 0) {
2800 		I16 ai16;
2801 		fromstr = NEXTFROM;
2802 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2803 		ai16 = htovs(ai16);
2804                 PUSH16(utf8, cur, &ai16, FALSE);
2805 	    }
2806 	    break;
2807         case 'S' | TYPE_IS_SHRIEKING:
2808 #if SHORTSIZE != SIZE16
2809 	    while (len-- > 0) {
2810 		unsigned short aushort;
2811 		fromstr = NEXTFROM;
2812 		aushort = SvUV_no_inf(fromstr, datumtype);
2813                 PUSH_VAR(utf8, cur, aushort, needs_swap);
2814 	    }
2815             break;
2816 #else
2817             /* FALLTHROUGH */
2818 #endif
2819 	case 'S':
2820 	    while (len-- > 0) {
2821 		U16 au16;
2822 		fromstr = NEXTFROM;
2823 		au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2824                 PUSH16(utf8, cur, &au16, needs_swap);
2825 	    }
2826 	    break;
2827 	case 's' | TYPE_IS_SHRIEKING:
2828 #if SHORTSIZE != SIZE16
2829 	    while (len-- > 0) {
2830 		short ashort;
2831 		fromstr = NEXTFROM;
2832 		ashort = SvIV_no_inf(fromstr, datumtype);
2833                 PUSH_VAR(utf8, cur, ashort, needs_swap);
2834 	    }
2835             break;
2836 #else
2837             /* FALLTHROUGH */
2838 #endif
2839 	case 's':
2840 	    while (len-- > 0) {
2841 		I16 ai16;
2842 		fromstr = NEXTFROM;
2843 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2844                 PUSH16(utf8, cur, &ai16, needs_swap);
2845 	    }
2846 	    break;
2847 	case 'I':
2848 	case 'I' | TYPE_IS_SHRIEKING:
2849 	    while (len-- > 0) {
2850 		unsigned int auint;
2851 		fromstr = NEXTFROM;
2852 		auint = SvUV_no_inf(fromstr, datumtype);
2853                 PUSH_VAR(utf8, cur, auint, needs_swap);
2854 	    }
2855 	    break;
2856 	case 'j':
2857 	    while (len-- > 0) {
2858 		IV aiv;
2859 		fromstr = NEXTFROM;
2860 		aiv = SvIV_no_inf(fromstr, datumtype);
2861                 PUSH_VAR(utf8, cur, aiv, needs_swap);
2862 	    }
2863 	    break;
2864 	case 'J':
2865 	    while (len-- > 0) {
2866 		UV auv;
2867 		fromstr = NEXTFROM;
2868 		auv = SvUV_no_inf(fromstr, datumtype);
2869                 PUSH_VAR(utf8, cur, auv, needs_swap);
2870 	    }
2871 	    break;
2872 	case 'w':
2873             while (len-- > 0) {
2874 		NV anv;
2875 		fromstr = NEXTFROM;
2876 		S_sv_check_infnan(aTHX_ fromstr, datumtype);
2877 		anv = SvNV_nomg(fromstr);
2878 
2879 		if (anv < 0) {
2880 		    *cur = '\0';
2881 		    SvCUR_set(cat, cur - start);
2882 		    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2883 		}
2884 
2885                 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2886                    which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2887                    any negative IVs will have already been got by the croak()
2888                    above. IOK is untrue for fractions, so we test them
2889                    against UV_MAX_P1.  */
2890 		if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2891 		    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
2892 		    char  *in = buf + sizeof(buf);
2893 		    UV     auv = SvUV_nomg(fromstr);
2894 
2895 		    do {
2896 			*--in = (char)((auv & 0x7f) | 0x80);
2897 			auv >>= 7;
2898 		    } while (auv);
2899 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2900 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2901 				       in, (buf + sizeof(buf)) - in);
2902 		} else if (SvPOKp(fromstr))
2903 		    goto w_string;
2904 		else if (SvNOKp(fromstr)) {
2905 		    /* 10**NV_MAX_10_EXP is the largest power of 10
2906 		       so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2907 		       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2908 		       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2909 		       And with that many bytes only Inf can overflow.
2910 		       Some C compilers are strict about integral constant
2911 		       expressions so we conservatively divide by a slightly
2912 		       smaller integer instead of multiplying by the exact
2913 		       floating-point value.
2914 		    */
2915 #ifdef NV_MAX_10_EXP
2916 		    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2917 		    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2918 #else
2919 		    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2920 		    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2921 #endif
2922 		    char  *in = buf + sizeof(buf);
2923 
2924 		    anv = Perl_floor(anv);
2925 		    do {
2926 			const NV next = Perl_floor(anv / 128);
2927 			if (in <= buf)  /* this cannot happen ;-) */
2928 			    Perl_croak(aTHX_ "Cannot compress integer in pack");
2929 			*--in = (unsigned char)(anv - (next * 128)) | 0x80;
2930 			anv = next;
2931 		    } while (anv > 0);
2932 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2933 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2934 				       in, (buf + sizeof(buf)) - in);
2935 		} else {
2936 		    const char     *from;
2937 		    char           *result, *in;
2938 		    SV             *norm;
2939 		    STRLEN          len;
2940 		    bool            done;
2941 
2942 		  w_string:
2943 		    /* Copy string and check for compliance */
2944 		    from = SvPV_nomg_const(fromstr, len);
2945 		    if ((norm = is_an_int(from, len)) == NULL)
2946 			Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2947 
2948 		    Newx(result, len, char);
2949 		    in = result + len;
2950 		    done = FALSE;
2951 		    while (!done) *--in = div128(norm, &done) | 0x80;
2952 		    result[len - 1] &= 0x7F; /* clear continue bit */
2953 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2954 				       in, (result + len) - in);
2955 		    Safefree(result);
2956 		    SvREFCNT_dec(norm);	/* free norm */
2957 		}
2958 	    }
2959             break;
2960 	case 'i':
2961 	case 'i' | TYPE_IS_SHRIEKING:
2962 	    while (len-- > 0) {
2963 		int aint;
2964 		fromstr = NEXTFROM;
2965 		aint = SvIV_no_inf(fromstr, datumtype);
2966                 PUSH_VAR(utf8, cur, aint, needs_swap);
2967 	    }
2968 	    break;
2969 	case 'N' | TYPE_IS_SHRIEKING:
2970 	case 'N':
2971 	    while (len-- > 0) {
2972 		U32 au32;
2973 		fromstr = NEXTFROM;
2974 		au32 = SvUV_no_inf(fromstr, datumtype);
2975 		au32 = PerlSock_htonl(au32);
2976                 PUSH32(utf8, cur, &au32, FALSE);
2977 	    }
2978 	    break;
2979 	case 'V' | TYPE_IS_SHRIEKING:
2980 	case 'V':
2981 	    while (len-- > 0) {
2982 		U32 au32;
2983 		fromstr = NEXTFROM;
2984 		au32 = SvUV_no_inf(fromstr, datumtype);
2985 		au32 = htovl(au32);
2986                 PUSH32(utf8, cur, &au32, FALSE);
2987 	    }
2988 	    break;
2989 	case 'L' | TYPE_IS_SHRIEKING:
2990 #if LONGSIZE != SIZE32
2991 	    while (len-- > 0) {
2992 		unsigned long aulong;
2993 		fromstr = NEXTFROM;
2994 		aulong = SvUV_no_inf(fromstr, datumtype);
2995                 PUSH_VAR(utf8, cur, aulong, needs_swap);
2996 	    }
2997 	    break;
2998 #else
2999             /* Fall though! */
3000 #endif
3001 	case 'L':
3002 	    while (len-- > 0) {
3003 		U32 au32;
3004 		fromstr = NEXTFROM;
3005 		au32 = SvUV_no_inf(fromstr, datumtype);
3006                 PUSH32(utf8, cur, &au32, needs_swap);
3007 	    }
3008 	    break;
3009 	case 'l' | TYPE_IS_SHRIEKING:
3010 #if LONGSIZE != SIZE32
3011 	    while (len-- > 0) {
3012 		long along;
3013 		fromstr = NEXTFROM;
3014 		along = SvIV_no_inf(fromstr, datumtype);
3015                 PUSH_VAR(utf8, cur, along, needs_swap);
3016 	    }
3017 	    break;
3018 #else
3019             /* Fall though! */
3020 #endif
3021 	case 'l':
3022             while (len-- > 0) {
3023 		I32 ai32;
3024 		fromstr = NEXTFROM;
3025 		ai32 = SvIV_no_inf(fromstr, datumtype);
3026                 PUSH32(utf8, cur, &ai32, needs_swap);
3027 	    }
3028 	    break;
3029 #if defined(HAS_QUAD) && IVSIZE >= 8
3030 	case 'Q':
3031 	    while (len-- > 0) {
3032 		Uquad_t auquad;
3033 		fromstr = NEXTFROM;
3034 		auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3035                 PUSH_VAR(utf8, cur, auquad, needs_swap);
3036 	    }
3037 	    break;
3038 	case 'q':
3039 	    while (len-- > 0) {
3040 		Quad_t aquad;
3041 		fromstr = NEXTFROM;
3042 		aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3043                 PUSH_VAR(utf8, cur, aquad, needs_swap);
3044 	    }
3045 	    break;
3046 #endif
3047 	case 'P':
3048 	    len = 1;		/* assume SV is correct length */
3049 	    GROWING(utf8, cat, start, cur, sizeof(char *));
3050 	    /* FALLTHROUGH */
3051 	case 'p':
3052 	    while (len-- > 0) {
3053 		const char *aptr;
3054 
3055 		fromstr = NEXTFROM;
3056 		SvGETMAGIC(fromstr);
3057 		if (!SvOK(fromstr)) aptr = NULL;
3058 		else {
3059 		    /* XXX better yet, could spirit away the string to
3060 		     * a safe spot and hang on to it until the result
3061 		     * of pack() (and all copies of the result) are
3062 		     * gone.
3063 		     */
3064 		    if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3065 			 || (SvPADTMP(fromstr) &&
3066 			     !SvREADONLY(fromstr)))) {
3067 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3068 				       "Attempt to pack pointer to temporary value");
3069 		    }
3070 		    if (SvPOK(fromstr) || SvNIOK(fromstr))
3071 			aptr = SvPV_nomg_const_nolen(fromstr);
3072 		    else
3073 			aptr = SvPV_force_flags_nolen(fromstr, 0);
3074 		}
3075                 PUSH_VAR(utf8, cur, aptr, needs_swap);
3076 	    }
3077 	    break;
3078 	case 'u': {
3079 	    const char *aptr, *aend;
3080 	    bool from_utf8;
3081 
3082 	    fromstr = NEXTFROM;
3083 	    if (len <= 2) len = 45;
3084 	    else len = len / 3 * 3;
3085 	    if (len >= 64) {
3086 		Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3087 			       "Field too wide in 'u' format in pack");
3088 		len = 63;
3089 	    }
3090 	    aptr = SvPV_const(fromstr, fromlen);
3091 	    from_utf8 = DO_UTF8(fromstr);
3092 	    if (from_utf8) {
3093 		aend = aptr + fromlen;
3094 		fromlen = sv_len_utf8_nomg(fromstr);
3095 	    } else aend = NULL; /* Unused, but keep compilers happy */
3096 	    GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3097 	    while (fromlen > 0) {
3098 		U8 *end;
3099 		SSize_t todo;
3100 		U8 hunk[1+63/3*4+1];
3101 
3102 		if ((SSize_t)fromlen > len)
3103 		    todo = len;
3104 		else
3105 		    todo = fromlen;
3106 		if (from_utf8) {
3107 		    char buffer[64];
3108 		    if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3109 				      'u' | TYPE_IS_PACK)) {
3110 			*cur = '\0';
3111 			SvCUR_set(cat, cur - start);
3112 			Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3113 				   "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3114 				   aptr, aend, buffer, todo);
3115 		    }
3116 		    end = doencodes(hunk, (const U8 *)buffer, todo);
3117 		} else {
3118 		    end = doencodes(hunk, (const U8 *)aptr, todo);
3119 		    aptr += todo;
3120 		}
3121 		PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3122 		fromlen -= todo;
3123 	    }
3124 	    break;
3125 	}
3126 	}
3127 	*cur = '\0';
3128 	SvCUR_set(cat, cur - start);
3129       no_change:
3130 	*symptr = lookahead;
3131     }
3132     return beglist;
3133 }
3134 #undef NEXTFROM
3135 
3136 
PP(pp_pack)3137 PP(pp_pack)
3138 {
3139     dSP; dMARK; dORIGMARK; dTARGET;
3140     SV *cat = TARG;
3141     STRLEN fromlen;
3142     SV *pat_sv = *++MARK;
3143     const char *pat = SvPV_const(pat_sv, fromlen);
3144     const char *patend = pat + fromlen;
3145 
3146     MARK++;
3147     SvPVCLEAR(cat);
3148     SvUTF8_off(cat);
3149 
3150     packlist(cat, pat, patend, MARK, SP + 1);
3151 
3152     if (SvUTF8(cat)) {
3153         STRLEN result_len;
3154         const char * result = SvPV_nomg(cat, result_len);
3155         const U8 * error_pos;
3156 
3157         if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3158             _force_out_malformed_utf8_message(error_pos,
3159                                               (U8 *) result + result_len,
3160                                               0, /* no flags */
3161                                               1 /* Die */
3162                                             );
3163             NOT_REACHED; /* NOTREACHED */
3164         }
3165     }
3166 
3167     SvSETMAGIC(cat);
3168     SP = ORIGMARK;
3169     PUSHs(cat);
3170     RETURN;
3171 }
3172 
3173 /*
3174  * ex: set ts=8 sts=4 sw=4 et:
3175  */
3176