1 /* pp_pack.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * He still hopefully carried some of his gear in his pack: a small tinder-box, 13 * two small shallow pans, the smaller fitting into the larger; inside them a 14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and 15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure, 16 * some salt. 17 * 18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"] 19 */ 20 21 /* This file contains pp ("push/pop") functions that 22 * execute the opcodes that make up a perl program. A typical pp function 23 * expects to find its arguments on the stack, and usually pushes its 24 * results onto the stack, hence the 'pp' terminology. Each OP structure 25 * contains a pointer to the relevant pp_foo() function. 26 * 27 * This particular file just contains pp_pack() and pp_unpack(). See the 28 * other pp*.c files for the rest of the pp_ functions. 29 */ 30 31 #include "EXTERN.h" 32 #define PERL_IN_PP_PACK_C 33 #include "perl.h" 34 35 /* Types used by pack/unpack */ 36 typedef enum { 37 e_no_len, /* no length */ 38 e_number, /* number, [] */ 39 e_star /* asterisk */ 40 } howlen_t; 41 42 typedef struct tempsym { 43 const char* patptr; /* current template char */ 44 const char* patend; /* one after last char */ 45 const char* grpbeg; /* 1st char of ()-group */ 46 const char* grpend; /* end of ()-group */ 47 I32 code; /* template code (!<>) */ 48 I32 length; /* length/repeat count */ 49 howlen_t howlen; /* how length is given */ 50 int level; /* () nesting level */ 51 U32 flags; /* /=4, comma=2, pack=1 */ 52 /* and group modifiers */ 53 STRLEN strbeg; /* offset of group start */ 54 struct tempsym *previous; /* previous group */ 55 } tempsym_t; 56 57 #define TEMPSYM_INIT(symptr, p, e, f) \ 58 STMT_START { \ 59 (symptr)->patptr = (p); \ 60 (symptr)->patend = (e); \ 61 (symptr)->grpbeg = NULL; \ 62 (symptr)->grpend = NULL; \ 63 (symptr)->grpend = NULL; \ 64 (symptr)->code = 0; \ 65 (symptr)->length = 0; \ 66 (symptr)->howlen = e_no_len; \ 67 (symptr)->level = 0; \ 68 (symptr)->flags = (f); \ 69 (symptr)->strbeg = 0; \ 70 (symptr)->previous = NULL; \ 71 } STMT_END 72 73 typedef union { 74 NV nv; 75 U8 bytes[sizeof(NV)]; 76 } NV_bytes; 77 78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 79 typedef union { 80 long double ld; 81 U8 bytes[sizeof(long double)]; 82 } ld_bytes; 83 #endif 84 85 #ifndef CHAR_BIT 86 # define CHAR_BIT 8 87 #endif 88 /* Maximum number of bytes to which a byte can grow due to upgrade */ 89 #define UTF8_EXPAND 2 90 91 /* 92 * Offset for integer pack/unpack. 93 * 94 * On architectures where I16 and I32 aren't really 16 and 32 bits, 95 * which for now are all Crays, pack and unpack have to play games. 96 */ 97 98 /* 99 * These values are required for portability of pack() output. 100 * If they're not right on your machine, then pack() and unpack() 101 * wouldn't work right anyway; you'll need to apply the Cray hack. 102 * (I'd like to check them with #if, but you can't use sizeof() in 103 * the preprocessor.) --??? 104 */ 105 /* 106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE 107 defines are now in config.h. --Andy Dougherty April 1998 108 */ 109 #define SIZE16 2 110 #define SIZE32 4 111 112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack(). 113 --jhi Feb 1999 */ 114 115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32 116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ 117 # define OFF16(p) ((char*)(p)) 118 # define OFF32(p) ((char*)(p)) 119 # else 120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ 121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16)) 122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32)) 123 # else 124 ++++ bad cray byte order 125 # endif 126 # endif 127 #else 128 # define OFF16(p) ((char *) (p)) 129 # define OFF32(p) ((char *) (p)) 130 #endif 131 132 #define PUSH16(utf8, cur, p, needs_swap) \ 133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap) 134 #define PUSH32(utf8, cur, p, needs_swap) \ 135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap) 136 137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ 138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN) 139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ 140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN) 141 #else 142 # error "Unsupported byteorder" 143 /* Need to add code here to re-instate mixed endian support. 144 NEEDS_SWAP would need to hold a flag indicating which action to 145 take, and S_reverse_copy and the code in uni_to_bytes would need 146 logic adding to deal with any mixed-endian transformations needed. 147 */ 148 #endif 149 150 /* Only to be used inside a loop (see the break) */ 151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \ 152 STMT_START { \ 153 if (UNLIKELY(utf8)) { \ 154 if (!uni_to_bytes(aTHX_ &s, strend, \ 155 (char *) (buf), len, datumtype)) break; \ 156 } else { \ 157 if (UNLIKELY(needs_swap)) \ 158 S_reverse_copy(s, (char *) (buf), len); \ 159 else \ 160 Copy(s, (char *) (buf), len, char); \ 161 s += len; \ 162 } \ 163 } STMT_END 164 165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \ 166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap) 167 168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \ 169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap) 170 171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \ 172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap) 173 174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \ 175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap) 176 177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */ 178 #define MAX_SUB_TEMPLATE_LEVEL 100 179 180 /* flags (note that type modifiers can also be used as flags!) */ 181 #define FLAG_WAS_UTF8 0x40 182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */ 183 #define FLAG_UNPACK_ONLY_ONE 0x10 184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */ 185 #define FLAG_SLASH 0x04 186 #define FLAG_COMMA 0x02 187 #define FLAG_PACK 0x01 188 189 STATIC SV * 190 S_mul128(pTHX_ SV *sv, U8 m) 191 { 192 STRLEN len; 193 char *s = SvPV(sv, len); 194 char *t; 195 196 PERL_ARGS_ASSERT_MUL128; 197 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */ 199 SV * const tmpNew = newSVpvs("0000000000"); 200 201 sv_catsv(tmpNew, sv); 202 SvREFCNT_dec(sv); /* free old sv */ 203 sv = tmpNew; 204 s = SvPV(sv, len); 205 } 206 t = s + len - 1; 207 while (!*t) /* trailing '\0'? */ 208 t--; 209 while (t > s) { 210 const U32 i = ((*t - '0') << 7) + m; 211 *(t--) = '0' + (char)(i % 10); 212 m = (char)(i / 10); 213 } 214 return (sv); 215 } 216 217 /* Explosives and implosives. */ 218 219 #if 'I' == 73 && 'J' == 74 220 /* On an ASCII/ISO kind of system */ 221 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a') 222 #else 223 /* 224 Some other sort of character set - use memchr() so we don't match 225 the null byte. 226 */ 227 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') 228 #endif 229 230 /* type modifiers */ 231 #define TYPE_IS_SHRIEKING 0x100 232 #define TYPE_IS_BIG_ENDIAN 0x200 233 #define TYPE_IS_LITTLE_ENDIAN 0x400 234 #define TYPE_IS_PACK 0x800 235 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN) 236 #define TYPE_MODIFIERS(t) ((t) & ~0xFF) 237 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF) 238 239 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK) 240 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK) 241 242 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP(" 243 244 #define PACK_SIZE_CANNOT_CSUM 0x80 245 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */ 246 #define PACK_SIZE_MASK 0x3F 247 248 #include "packsizetables.c" 249 250 static void 251 S_reverse_copy(const char *src, char *dest, STRLEN len) 252 { 253 dest += len; 254 while (len--) 255 *--dest = *src++; 256 } 257 258 STATIC U8 259 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) 260 { 261 STRLEN retlen; 262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, 263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 264 /* We try to process malformed UTF-8 as much as possible (preferably with 265 warnings), but these two mean we make no progress in the string and 266 might enter an infinite loop */ 267 if (retlen == (STRLEN) -1 || retlen == 0) 268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack", 269 (int) TYPE_NO_MODIFIERS(datumtype)); 270 if (val >= 0x100) { 271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK), 272 "Character in '%c' format wrapped in unpack", 273 (int) TYPE_NO_MODIFIERS(datumtype)); 274 val &= 0xff; 275 } 276 *s += retlen; 277 return (U8)val; 278 } 279 280 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \ 281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \ 282 *(U8 *)(s)++) 283 284 STATIC bool 285 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype) 286 { 287 UV val; 288 STRLEN retlen; 289 const char *from = *s; 290 int bad = 0; 291 const U32 flags = ckWARN(WARN_UTF8) ? 292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY); 293 const bool needs_swap = NEEDS_SWAP(datumtype); 294 295 if (UNLIKELY(needs_swap)) 296 buf += buf_len; 297 298 for (;buf_len > 0; buf_len--) { 299 if (from >= end) return FALSE; 300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags); 301 if (retlen == (STRLEN) -1 || retlen == 0) { 302 from += UTF8SKIP(from); 303 bad |= 1; 304 } else from += retlen; 305 if (val >= 0x100) { 306 bad |= 2; 307 val &= 0xff; 308 } 309 if (UNLIKELY(needs_swap)) 310 *(U8 *)--buf = (U8)val; 311 else 312 *(U8 *)buf++ = (U8)val; 313 } 314 /* We have enough characters for the buffer. Did we have problems ? */ 315 if (bad) { 316 if (bad & 1) { 317 /* Rewalk the string fragment while warning */ 318 const char *ptr; 319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY; 320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) { 321 if (ptr >= end) break; 322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags); 323 } 324 if (from > end) from = end; 325 } 326 if ((bad & 2)) 327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ? 328 WARN_PACK : WARN_UNPACK), 329 "Character(s) in '%c' format wrapped in %s", 330 (int) TYPE_NO_MODIFIERS(datumtype), 331 datumtype & TYPE_IS_PACK ? "pack" : "unpack"); 332 } 333 *s = from; 334 return TRUE; 335 } 336 337 STATIC bool 338 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out) 339 { 340 dVAR; 341 STRLEN retlen; 342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY); 343 if (val >= 0x100 || !ISUUCHAR(val) || 344 retlen == (STRLEN) -1 || retlen == 0) { 345 *out = 0; 346 return FALSE; 347 } 348 *out = PL_uudmap[val] & 077; 349 *s += retlen; 350 return TRUE; 351 } 352 353 STATIC char * 354 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) { 355 PERL_ARGS_ASSERT_BYTES_TO_UNI; 356 357 if (UNLIKELY(needs_swap)) { 358 const U8 *p = start + len; 359 while (p-- > start) { 360 append_utf8_from_native_byte(*p, (U8 **) & dest); 361 } 362 } else { 363 const U8 * const end = start + len; 364 while (start < end) { 365 append_utf8_from_native_byte(*start, (U8 **) & dest); 366 start++; 367 } 368 } 369 return dest; 370 } 371 372 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \ 373 STMT_START { \ 374 if (UNLIKELY(utf8)) \ 375 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \ 376 else { \ 377 if (UNLIKELY(needs_swap)) \ 378 S_reverse_copy((char *)(buf), cur, len); \ 379 else \ 380 Copy(buf, cur, len, char); \ 381 (cur) += (len); \ 382 } \ 383 } STMT_END 384 385 #define GROWING(utf8, cat, start, cur, in_len) \ 386 STMT_START { \ 387 STRLEN glen = (in_len); \ 388 if (utf8) glen *= UTF8_EXPAND; \ 389 if ((cur) + glen >= (start) + SvLEN(cat)) { \ 390 (start) = sv_exp_grow(cat, glen); \ 391 (cur) = (start) + SvCUR(cat); \ 392 } \ 393 } STMT_END 394 395 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \ 396 STMT_START { \ 397 const STRLEN glen = (in_len); \ 398 STRLEN gl = glen; \ 399 if (utf8) gl *= UTF8_EXPAND; \ 400 if ((cur) + gl >= (start) + SvLEN(cat)) { \ 401 *cur = '\0'; \ 402 SvCUR_set((cat), (cur) - (start)); \ 403 (start) = sv_exp_grow(cat, gl); \ 404 (cur) = (start) + SvCUR(cat); \ 405 } \ 406 PUSH_BYTES(utf8, cur, buf, glen, 0); \ 407 } STMT_END 408 409 #define PUSH_BYTE(utf8, s, byte) \ 410 STMT_START { \ 411 if (utf8) { \ 412 const U8 au8 = (byte); \ 413 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \ 414 } else *(U8 *)(s)++ = (byte); \ 415 } STMT_END 416 417 /* Only to be used inside a loop (see the break) */ 418 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \ 419 STMT_START { \ 420 STRLEN retlen; \ 421 if (str >= end) break; \ 422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \ 423 if (retlen == (STRLEN) -1 || retlen == 0) { \ 424 *cur = '\0'; \ 425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \ 426 } \ 427 str += retlen; \ 428 } STMT_END 429 430 static const char *_action( const tempsym_t* symptr ) 431 { 432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"); 433 } 434 435 /* Returns the sizeof() struct described by pat */ 436 STATIC I32 437 S_measure_struct(pTHX_ tempsym_t* symptr) 438 { 439 I32 total = 0; 440 441 PERL_ARGS_ASSERT_MEASURE_STRUCT; 442 443 while (next_symbol(symptr)) { 444 I32 len; 445 int size; 446 447 switch (symptr->howlen) { 448 case e_star: 449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s", 450 _action( symptr ) ); 451 break; 452 default: 453 /* e_no_len and e_number */ 454 len = symptr->length; 455 break; 456 } 457 458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK; 459 if (!size) { 460 int star; 461 /* endianness doesn't influence the size of a type */ 462 switch(TYPE_NO_ENDIANNESS(symptr->code)) { 463 default: 464 Perl_croak(aTHX_ "Invalid type '%c' in %s", 465 (int)TYPE_NO_MODIFIERS(symptr->code), 466 _action( symptr ) ); 467 case '.' | TYPE_IS_SHRIEKING: 468 case '@' | TYPE_IS_SHRIEKING: 469 case '@': 470 case '.': 471 case '/': 472 case 'U': /* XXXX Is it correct? */ 473 case 'w': 474 case 'u': 475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s", 476 (int) TYPE_NO_MODIFIERS(symptr->code), 477 _action( symptr ) ); 478 case '%': 479 size = 0; 480 break; 481 case '(': 482 { 483 tempsym_t savsym = *symptr; 484 symptr->patptr = savsym.grpbeg; 485 symptr->patend = savsym.grpend; 486 /* XXXX Theoretically, we need to measure many times at 487 different positions, since the subexpression may contain 488 alignment commands, but be not of aligned length. 489 Need to detect this and croak(). */ 490 size = measure_struct(symptr); 491 *symptr = savsym; 492 break; 493 } 494 case 'X' | TYPE_IS_SHRIEKING: 495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS. 496 */ 497 if (!len) /* Avoid division by 0 */ 498 len = 1; 499 len = total % len; /* Assumed: the start is aligned. */ 500 /* FALL THROUGH */ 501 case 'X': 502 size = -1; 503 if (total < len) 504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) ); 505 break; 506 case 'x' | TYPE_IS_SHRIEKING: 507 if (!len) /* Avoid division by 0 */ 508 len = 1; 509 star = total % len; /* Assumed: the start is aligned. */ 510 if (star) /* Other portable ways? */ 511 len = len - star; 512 else 513 len = 0; 514 /* FALL THROUGH */ 515 case 'x': 516 case 'A': 517 case 'Z': 518 case 'a': 519 size = 1; 520 break; 521 case 'B': 522 case 'b': 523 len = (len + 7)/8; 524 size = 1; 525 break; 526 case 'H': 527 case 'h': 528 len = (len + 1)/2; 529 size = 1; 530 break; 531 532 case 'P': 533 len = 1; 534 size = sizeof(char*); 535 break; 536 } 537 } 538 total += len * size; 539 } 540 return total; 541 } 542 543 544 /* locate matching closing parenthesis or bracket 545 * returns char pointer to char after match, or NULL 546 */ 547 STATIC const char * 548 S_group_end(pTHX_ const char *patptr, const char *patend, char ender) 549 { 550 PERL_ARGS_ASSERT_GROUP_END; 551 552 while (patptr < patend) { 553 const char c = *patptr++; 554 555 if (isSPACE(c)) 556 continue; 557 else if (c == ender) 558 return patptr-1; 559 else if (c == '#') { 560 while (patptr < patend && *patptr != '\n') 561 patptr++; 562 continue; 563 } else if (c == '(') 564 patptr = group_end(patptr, patend, ')') + 1; 565 else if (c == '[') 566 patptr = group_end(patptr, patend, ']') + 1; 567 } 568 Perl_croak(aTHX_ "No group ending character '%c' found in template", 569 ender); 570 return 0; 571 } 572 573 574 /* Convert unsigned decimal number to binary. 575 * Expects a pointer to the first digit and address of length variable 576 * Advances char pointer to 1st non-digit char and returns number 577 */ 578 STATIC const char * 579 S_get_num(pTHX_ const char *patptr, I32 *lenptr ) 580 { 581 I32 len = *patptr++ - '0'; 582 583 PERL_ARGS_ASSERT_GET_NUM; 584 585 while (isDIGIT(*patptr)) { 586 if (len >= 0x7FFFFFFF/10) 587 Perl_croak(aTHX_ "pack/unpack repeat count overflow"); 588 len = (len * 10) + (*patptr++ - '0'); 589 } 590 *lenptr = len; 591 return patptr; 592 } 593 594 /* The marvellous template parsing routine: Using state stored in *symptr, 595 * locates next template code and count 596 */ 597 STATIC bool 598 S_next_symbol(pTHX_ tempsym_t* symptr ) 599 { 600 const char* patptr = symptr->patptr; 601 const char* const patend = symptr->patend; 602 603 PERL_ARGS_ASSERT_NEXT_SYMBOL; 604 605 symptr->flags &= ~FLAG_SLASH; 606 607 while (patptr < patend) { 608 if (isSPACE(*patptr)) 609 patptr++; 610 else if (*patptr == '#') { 611 patptr++; 612 while (patptr < patend && *patptr != '\n') 613 patptr++; 614 if (patptr < patend) 615 patptr++; 616 } else { 617 /* We should have found a template code */ 618 I32 code = *patptr++ & 0xFF; 619 U32 inherited_modifiers = 0; 620 621 if (code == ','){ /* grandfather in commas but with a warning */ 622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){ 623 symptr->flags |= FLAG_COMMA; 624 Perl_warner(aTHX_ packWARN(WARN_UNPACK), 625 "Invalid type ',' in %s", _action( symptr ) ); 626 } 627 continue; 628 } 629 630 /* for '(', skip to ')' */ 631 if (code == '(') { 632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' ) 633 Perl_croak(aTHX_ "()-group starts with a count in %s", 634 _action( symptr ) ); 635 symptr->grpbeg = patptr; 636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') ); 637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL ) 638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s", 639 _action( symptr ) ); 640 } 641 642 /* look for group modifiers to inherit */ 643 if (TYPE_ENDIANNESS(symptr->flags)) { 644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code))) 645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags); 646 } 647 648 /* look for modifiers */ 649 while (patptr < patend) { 650 const char *allowed; 651 I32 modifier; 652 switch (*patptr) { 653 case '!': 654 modifier = TYPE_IS_SHRIEKING; 655 allowed = "sSiIlLxXnNvV@."; 656 break; 657 case '>': 658 modifier = TYPE_IS_BIG_ENDIAN; 659 allowed = ENDIANNESS_ALLOWED_TYPES; 660 break; 661 case '<': 662 modifier = TYPE_IS_LITTLE_ENDIAN; 663 allowed = ENDIANNESS_ALLOWED_TYPES; 664 break; 665 default: 666 allowed = ""; 667 modifier = 0; 668 break; 669 } 670 671 if (modifier == 0) 672 break; 673 674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code))) 675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr, 676 allowed, _action( symptr ) ); 677 678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK) 679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s", 680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) ); 681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) == 682 TYPE_ENDIANNESS_MASK) 683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s", 684 *patptr, _action( symptr ) ); 685 686 if ((code & modifier)) { 687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK), 688 "Duplicate modifier '%c' after '%c' in %s", 689 *patptr, (int) TYPE_NO_MODIFIERS(code), 690 _action( symptr ) ); 691 } 692 693 code |= modifier; 694 patptr++; 695 } 696 697 /* inherit modifiers */ 698 code |= inherited_modifiers; 699 700 /* look for count and/or / */ 701 if (patptr < patend) { 702 if (isDIGIT(*patptr)) { 703 patptr = get_num( patptr, &symptr->length ); 704 symptr->howlen = e_number; 705 706 } else if (*patptr == '*') { 707 patptr++; 708 symptr->howlen = e_star; 709 710 } else if (*patptr == '[') { 711 const char* lenptr = ++patptr; 712 symptr->howlen = e_number; 713 patptr = group_end( patptr, patend, ']' ) + 1; 714 /* what kind of [] is it? */ 715 if (isDIGIT(*lenptr)) { 716 lenptr = get_num( lenptr, &symptr->length ); 717 if( *lenptr != ']' ) 718 Perl_croak(aTHX_ "Malformed integer in [] in %s", 719 _action( symptr ) ); 720 } else { 721 tempsym_t savsym = *symptr; 722 symptr->patend = patptr-1; 723 symptr->patptr = lenptr; 724 savsym.length = measure_struct(symptr); 725 *symptr = savsym; 726 } 727 } else { 728 symptr->howlen = e_no_len; 729 symptr->length = 1; 730 } 731 732 /* try to find / */ 733 while (patptr < patend) { 734 if (isSPACE(*patptr)) 735 patptr++; 736 else if (*patptr == '#') { 737 patptr++; 738 while (patptr < patend && *patptr != '\n') 739 patptr++; 740 if (patptr < patend) 741 patptr++; 742 } else { 743 if (*patptr == '/') { 744 symptr->flags |= FLAG_SLASH; 745 patptr++; 746 if (patptr < patend && 747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[')) 748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s", 749 _action( symptr ) ); 750 } 751 break; 752 } 753 } 754 } else { 755 /* at end - no count, no / */ 756 symptr->howlen = e_no_len; 757 symptr->length = 1; 758 } 759 760 symptr->code = code; 761 symptr->patptr = patptr; 762 return TRUE; 763 } 764 } 765 symptr->patptr = patptr; 766 return FALSE; 767 } 768 769 /* 770 There is no way to cleanly handle the case where we should process the 771 string per byte in its upgraded form while it's really in downgraded form 772 (e.g. estimates like strend-s as an upper bound for the number of 773 characters left wouldn't work). So if we foresee the need of this 774 (pattern starts with U or contains U0), we want to work on the encoded 775 version of the string. Users are advised to upgrade their pack string 776 themselves if they need to do a lot of unpacks like this on it 777 */ 778 STATIC bool 779 need_utf8(const char *pat, const char *patend) 780 { 781 bool first = TRUE; 782 783 PERL_ARGS_ASSERT_NEED_UTF8; 784 785 while (pat < patend) { 786 if (pat[0] == '#') { 787 pat++; 788 pat = (const char *) memchr(pat, '\n', patend-pat); 789 if (!pat) return FALSE; 790 } else if (pat[0] == 'U') { 791 if (first || pat[1] == '0') return TRUE; 792 } else first = FALSE; 793 pat++; 794 } 795 return FALSE; 796 } 797 798 STATIC char 799 first_symbol(const char *pat, const char *patend) { 800 PERL_ARGS_ASSERT_FIRST_SYMBOL; 801 802 while (pat < patend) { 803 if (pat[0] != '#') return pat[0]; 804 pat++; 805 pat = (const char *) memchr(pat, '\n', patend-pat); 806 if (!pat) return 0; 807 pat++; 808 } 809 return 0; 810 } 811 812 /* 813 =for apidoc unpackstring 814 815 The engine implementing the unpack() Perl function. 816 817 Using the template pat..patend, this function unpacks the string 818 s..strend into a number of mortal SVs, which it pushes onto the perl 819 argument (@_) stack (so you will need to issue a C<PUTBACK> before and 820 C<SPAGAIN> after the call to this function). It returns the number of 821 pushed elements. 822 823 The strend and patend pointers should point to the byte following the last 824 character of each string. 825 826 Although this function returns its values on the perl argument stack, it 827 doesn't take any parameters from that stack (and thus in particular 828 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for 829 example). 830 831 =cut */ 832 833 I32 834 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags) 835 { 836 tempsym_t sym; 837 838 PERL_ARGS_ASSERT_UNPACKSTRING; 839 840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8; 841 else if (need_utf8(pat, patend)) { 842 /* We probably should try to avoid this in case a scalar context call 843 wouldn't get to the "U0" */ 844 STRLEN len = strend - s; 845 s = (char *) bytes_to_utf8((U8 *) s, &len); 846 SAVEFREEPV(s); 847 strend = s + len; 848 flags |= FLAG_DO_UTF8; 849 } 850 851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8)) 852 flags |= FLAG_PARSE_UTF8; 853 854 TEMPSYM_INIT(&sym, pat, patend, flags); 855 856 return unpack_rec(&sym, s, s, strend, NULL ); 857 } 858 859 STATIC I32 860 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s ) 861 { 862 dVAR; dSP; 863 SV *sv = NULL; 864 const I32 start_sp_offset = SP - PL_stack_base; 865 howlen_t howlen; 866 I32 checksum = 0; 867 UV cuv = 0; 868 NV cdouble = 0.0; 869 const int bits_in_uv = CHAR_BIT * sizeof(cuv); 870 bool beyond = FALSE; 871 bool explicit_length; 872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0; 873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 874 875 PERL_ARGS_ASSERT_UNPACK_REC; 876 877 symptr->strbeg = s - strbeg; 878 879 while (next_symbol(symptr)) { 880 packprops_t props; 881 I32 len; 882 I32 datumtype = symptr->code; 883 bool needs_swap; 884 /* do first one only unless in list context 885 / is implemented by unpacking the count, then popping it from the 886 stack, so must check that we're not in the middle of a / */ 887 if ( unpack_only_one 888 && (SP - PL_stack_base == start_sp_offset + 1) 889 && (datumtype != '/') ) /* XXX can this be omitted */ 890 break; 891 892 switch (howlen = symptr->howlen) { 893 case e_star: 894 len = strend - strbeg; /* long enough */ 895 break; 896 default: 897 /* e_no_len and e_number */ 898 len = symptr->length; 899 break; 900 } 901 902 explicit_length = TRUE; 903 redo_switch: 904 beyond = s >= strend; 905 906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 907 if (props) { 908 /* props nonzero means we can process this letter. */ 909 const long size = props & PACK_SIZE_MASK; 910 const long howmany = (strend - s) / size; 911 if (len > howmany) 912 len = howmany; 913 914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) { 915 if (len && unpack_only_one) len = 1; 916 EXTEND(SP, len); 917 EXTEND_MORTAL(len); 918 } 919 } 920 921 needs_swap = NEEDS_SWAP(datumtype); 922 923 switch(TYPE_NO_ENDIANNESS(datumtype)) { 924 default: 925 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) ); 926 927 case '%': 928 if (howlen == e_no_len) 929 len = 16; /* len is not specified */ 930 checksum = len; 931 cuv = 0; 932 cdouble = 0; 933 continue; 934 break; 935 case '(': 936 { 937 tempsym_t savsym = *symptr; 938 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 939 symptr->flags |= group_modifiers; 940 symptr->patend = savsym.grpend; 941 symptr->previous = &savsym; 942 symptr->level++; 943 PUTBACK; 944 if (len && unpack_only_one) len = 1; 945 while (len--) { 946 symptr->patptr = savsym.grpbeg; 947 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 948 else symptr->flags &= ~FLAG_PARSE_UTF8; 949 unpack_rec(symptr, s, strbeg, strend, &s); 950 if (s == strend && savsym.howlen == e_star) 951 break; /* No way to continue */ 952 } 953 SPAGAIN; 954 savsym.flags = symptr->flags & ~group_modifiers; 955 *symptr = savsym; 956 break; 957 } 958 case '.' | TYPE_IS_SHRIEKING: 959 case '.': { 960 const char *from; 961 SV *sv; 962 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING); 963 if (howlen == e_star) from = strbeg; 964 else if (len <= 0) from = s; 965 else { 966 tempsym_t *group = symptr; 967 968 while (--len && group) group = group->previous; 969 from = group ? strbeg + group->strbeg : strbeg; 970 } 971 sv = from <= s ? 972 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) : 973 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s))); 974 mXPUSHs(sv); 975 break; 976 } 977 case '@' | TYPE_IS_SHRIEKING: 978 case '@': 979 s = strbeg + symptr->strbeg; 980 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 981 { 982 while (len > 0) { 983 if (s >= strend) 984 Perl_croak(aTHX_ "'@' outside of string in unpack"); 985 s += UTF8SKIP(s); 986 len--; 987 } 988 if (s > strend) 989 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack"); 990 } else { 991 if (strend-s < len) 992 Perl_croak(aTHX_ "'@' outside of string in unpack"); 993 s += len; 994 } 995 break; 996 case 'X' | TYPE_IS_SHRIEKING: 997 if (!len) /* Avoid division by 0 */ 998 len = 1; 999 if (utf8) { 1000 const char *hop, *last; 1001 I32 l = len; 1002 hop = last = strbeg; 1003 while (hop < s) { 1004 hop += UTF8SKIP(hop); 1005 if (--l == 0) { 1006 last = hop; 1007 l = len; 1008 } 1009 } 1010 if (last > s) 1011 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1012 s = last; 1013 break; 1014 } 1015 len = (s - strbeg) % len; 1016 /* FALL THROUGH */ 1017 case 'X': 1018 if (utf8) { 1019 while (len > 0) { 1020 if (s <= strbeg) 1021 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1022 while (--s, UTF8_IS_CONTINUATION(*s)) { 1023 if (s <= strbeg) 1024 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1025 } 1026 len--; 1027 } 1028 } else { 1029 if (len > s - strbeg) 1030 Perl_croak(aTHX_ "'X' outside of string in unpack" ); 1031 s -= len; 1032 } 1033 break; 1034 case 'x' | TYPE_IS_SHRIEKING: { 1035 I32 ai32; 1036 if (!len) /* Avoid division by 0 */ 1037 len = 1; 1038 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len; 1039 else ai32 = (s - strbeg) % len; 1040 if (ai32 == 0) break; 1041 len -= ai32; 1042 } 1043 /* FALL THROUGH */ 1044 case 'x': 1045 if (utf8) { 1046 while (len>0) { 1047 if (s >= strend) 1048 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1049 s += UTF8SKIP(s); 1050 len--; 1051 } 1052 } else { 1053 if (len > strend - s) 1054 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1055 s += len; 1056 } 1057 break; 1058 case '/': 1059 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack"); 1060 break; 1061 case 'A': 1062 case 'Z': 1063 case 'a': 1064 if (checksum) { 1065 /* Preliminary length estimate is assumed done in 'W' */ 1066 if (len > strend - s) len = strend - s; 1067 goto W_checksum; 1068 } 1069 if (utf8) { 1070 I32 l; 1071 const char *hop; 1072 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) { 1073 if (hop >= strend) { 1074 if (hop > strend) 1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1076 break; 1077 } 1078 } 1079 if (hop > strend) 1080 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1081 len = hop - s; 1082 } else if (len > strend - s) 1083 len = strend - s; 1084 1085 if (datumtype == 'Z') { 1086 /* 'Z' strips stuff after first null */ 1087 const char *ptr, *end; 1088 end = s + len; 1089 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break; 1090 sv = newSVpvn(s, ptr-s); 1091 if (howlen == e_star) /* exact for 'Z*' */ 1092 len = ptr-s + (ptr != strend ? 1 : 0); 1093 } else if (datumtype == 'A') { 1094 /* 'A' strips both nulls and spaces */ 1095 const char *ptr; 1096 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) { 1097 for (ptr = s+len-1; ptr >= s; ptr--) 1098 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) && 1099 !isSPACE_utf8(ptr)) break; 1100 if (ptr >= s) ptr += UTF8SKIP(ptr); 1101 else ptr++; 1102 if (ptr > s+len) 1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1104 } else { 1105 for (ptr = s+len-1; ptr >= s; ptr--) 1106 if (*ptr != 0 && !isSPACE(*ptr)) break; 1107 ptr++; 1108 } 1109 sv = newSVpvn(s, ptr-s); 1110 } else sv = newSVpvn(s, len); 1111 1112 if (utf8) { 1113 SvUTF8_on(sv); 1114 /* Undo any upgrade done due to need_utf8() */ 1115 if (!(symptr->flags & FLAG_WAS_UTF8)) 1116 sv_utf8_downgrade(sv, 0); 1117 } 1118 mXPUSHs(sv); 1119 s += len; 1120 break; 1121 case 'B': 1122 case 'b': { 1123 char *str; 1124 if (howlen == e_star || len > (strend - s) * 8) 1125 len = (strend - s) * 8; 1126 if (checksum) { 1127 if (utf8) 1128 while (len >= 8 && s < strend) { 1129 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)]; 1130 len -= 8; 1131 } 1132 else 1133 while (len >= 8) { 1134 cuv += PL_bitcount[*(U8 *)s++]; 1135 len -= 8; 1136 } 1137 if (len && s < strend) { 1138 U8 bits; 1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype); 1140 if (datumtype == 'b') 1141 while (len-- > 0) { 1142 if (bits & 1) cuv++; 1143 bits >>= 1; 1144 } 1145 else 1146 while (len-- > 0) { 1147 if (bits & 0x80) cuv++; 1148 bits <<= 1; 1149 } 1150 } 1151 break; 1152 } 1153 1154 sv = sv_2mortal(newSV(len ? len : 1)); 1155 SvPOK_on(sv); 1156 str = SvPVX(sv); 1157 if (datumtype == 'b') { 1158 U8 bits = 0; 1159 const I32 ai32 = len; 1160 for (len = 0; len < ai32; len++) { 1161 if (len & 7) bits >>= 1; 1162 else if (utf8) { 1163 if (s >= strend) break; 1164 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1165 } else bits = *(U8 *) s++; 1166 *str++ = bits & 1 ? '1' : '0'; 1167 } 1168 } else { 1169 U8 bits = 0; 1170 const I32 ai32 = len; 1171 for (len = 0; len < ai32; len++) { 1172 if (len & 7) bits <<= 1; 1173 else if (utf8) { 1174 if (s >= strend) break; 1175 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1176 } else bits = *(U8 *) s++; 1177 *str++ = bits & 0x80 ? '1' : '0'; 1178 } 1179 } 1180 *str = '\0'; 1181 SvCUR_set(sv, str - SvPVX_const(sv)); 1182 XPUSHs(sv); 1183 break; 1184 } 1185 case 'H': 1186 case 'h': { 1187 char *str = NULL; 1188 /* Preliminary length estimate, acceptable for utf8 too */ 1189 if (howlen == e_star || len > (strend - s) * 2) 1190 len = (strend - s) * 2; 1191 if (!checksum) { 1192 sv = sv_2mortal(newSV(len ? len : 1)); 1193 SvPOK_on(sv); 1194 str = SvPVX(sv); 1195 } 1196 if (datumtype == 'h') { 1197 U8 bits = 0; 1198 I32 ai32 = len; 1199 for (len = 0; len < ai32; len++) { 1200 if (len & 1) bits >>= 4; 1201 else if (utf8) { 1202 if (s >= strend) break; 1203 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1204 } else bits = * (U8 *) s++; 1205 if (!checksum) 1206 *str++ = PL_hexdigit[bits & 15]; 1207 } 1208 } else { 1209 U8 bits = 0; 1210 const I32 ai32 = len; 1211 for (len = 0; len < ai32; len++) { 1212 if (len & 1) bits <<= 4; 1213 else if (utf8) { 1214 if (s >= strend) break; 1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1216 } else bits = *(U8 *) s++; 1217 if (!checksum) 1218 *str++ = PL_hexdigit[(bits >> 4) & 15]; 1219 } 1220 } 1221 if (!checksum) { 1222 *str = '\0'; 1223 SvCUR_set(sv, str - SvPVX_const(sv)); 1224 XPUSHs(sv); 1225 } 1226 break; 1227 } 1228 case 'C': 1229 if (len == 0) { 1230 if (explicit_length) 1231 /* Switch to "character" mode */ 1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 1233 break; 1234 } 1235 /* FALL THROUGH */ 1236 case 'c': 1237 while (len-- > 0 && s < strend) { 1238 int aint; 1239 if (utf8) 1240 { 1241 STRLEN retlen; 1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1244 if (retlen == (STRLEN) -1 || retlen == 0) 1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1246 s += retlen; 1247 } 1248 else 1249 aint = *(U8 *)(s)++; 1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */ 1251 aint -= 256; 1252 if (!checksum) 1253 mPUSHi(aint); 1254 else if (checksum > bits_in_uv) 1255 cdouble += (NV)aint; 1256 else 1257 cuv += aint; 1258 } 1259 break; 1260 case 'W': 1261 W_checksum: 1262 if (utf8) { 1263 while (len-- > 0 && s < strend) { 1264 STRLEN retlen; 1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1267 if (retlen == (STRLEN) -1 || retlen == 0) 1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1269 s += retlen; 1270 if (!checksum) 1271 mPUSHu(val); 1272 else if (checksum > bits_in_uv) 1273 cdouble += (NV) val; 1274 else 1275 cuv += val; 1276 } 1277 } else if (!checksum) 1278 while (len-- > 0) { 1279 const U8 ch = *(U8 *) s++; 1280 mPUSHu(ch); 1281 } 1282 else if (checksum > bits_in_uv) 1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++; 1284 else 1285 while (len-- > 0) cuv += *(U8 *) s++; 1286 break; 1287 case 'U': 1288 if (len == 0) { 1289 if (explicit_length && howlen != e_star) { 1290 /* Switch to "bytes in UTF-8" mode */ 1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0; 1292 else 1293 /* Should be impossible due to the need_utf8() test */ 1294 Perl_croak(aTHX_ "U0 mode on a byte string"); 1295 } 1296 break; 1297 } 1298 if (len > strend - s) len = strend - s; 1299 if (!checksum) { 1300 if (len && unpack_only_one) len = 1; 1301 EXTEND(SP, len); 1302 EXTEND_MORTAL(len); 1303 } 1304 while (len-- > 0 && s < strend) { 1305 STRLEN retlen; 1306 UV auv; 1307 if (utf8) { 1308 U8 result[UTF8_MAXLEN]; 1309 const char *ptr = s; 1310 STRLEN len; 1311 /* Bug: warns about bad utf8 even if we are short on bytes 1312 and will break out of the loop */ 1313 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1, 1314 'U')) 1315 break; 1316 len = UTF8SKIP(result); 1317 if (!uni_to_bytes(aTHX_ &ptr, strend, 1318 (char *) &result[1], len-1, 'U')) break; 1319 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT); 1320 s = ptr; 1321 } else { 1322 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT); 1323 if (retlen == (STRLEN) -1 || retlen == 0) 1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1325 s += retlen; 1326 } 1327 if (!checksum) 1328 mPUSHu(auv); 1329 else if (checksum > bits_in_uv) 1330 cdouble += (NV) auv; 1331 else 1332 cuv += auv; 1333 } 1334 break; 1335 case 's' | TYPE_IS_SHRIEKING: 1336 #if SHORTSIZE != SIZE16 1337 while (len-- > 0) { 1338 short ashort; 1339 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap); 1340 if (!checksum) 1341 mPUSHi(ashort); 1342 else if (checksum > bits_in_uv) 1343 cdouble += (NV)ashort; 1344 else 1345 cuv += ashort; 1346 } 1347 break; 1348 #else 1349 /* Fallthrough! */ 1350 #endif 1351 case 's': 1352 while (len-- > 0) { 1353 I16 ai16; 1354 1355 #if U16SIZE > SIZE16 1356 ai16 = 0; 1357 #endif 1358 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap); 1359 #if U16SIZE > SIZE16 1360 if (ai16 > 32767) 1361 ai16 -= 65536; 1362 #endif 1363 if (!checksum) 1364 mPUSHi(ai16); 1365 else if (checksum > bits_in_uv) 1366 cdouble += (NV)ai16; 1367 else 1368 cuv += ai16; 1369 } 1370 break; 1371 case 'S' | TYPE_IS_SHRIEKING: 1372 #if SHORTSIZE != SIZE16 1373 while (len-- > 0) { 1374 unsigned short aushort; 1375 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap, 1376 needs_swap); 1377 if (!checksum) 1378 mPUSHu(aushort); 1379 else if (checksum > bits_in_uv) 1380 cdouble += (NV)aushort; 1381 else 1382 cuv += aushort; 1383 } 1384 break; 1385 #else 1386 /* Fallthrough! */ 1387 #endif 1388 case 'v': 1389 case 'n': 1390 case 'S': 1391 while (len-- > 0) { 1392 U16 au16; 1393 #if U16SIZE > SIZE16 1394 au16 = 0; 1395 #endif 1396 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap); 1397 if (datumtype == 'n') 1398 au16 = PerlSock_ntohs(au16); 1399 if (datumtype == 'v') 1400 au16 = vtohs(au16); 1401 if (!checksum) 1402 mPUSHu(au16); 1403 else if (checksum > bits_in_uv) 1404 cdouble += (NV) au16; 1405 else 1406 cuv += au16; 1407 } 1408 break; 1409 case 'v' | TYPE_IS_SHRIEKING: 1410 case 'n' | TYPE_IS_SHRIEKING: 1411 while (len-- > 0) { 1412 I16 ai16; 1413 # if U16SIZE > SIZE16 1414 ai16 = 0; 1415 # endif 1416 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap); 1417 /* There should never be any byte-swapping here. */ 1418 assert(!TYPE_ENDIANNESS(datumtype)); 1419 if (datumtype == ('n' | TYPE_IS_SHRIEKING)) 1420 ai16 = (I16) PerlSock_ntohs((U16) ai16); 1421 if (datumtype == ('v' | TYPE_IS_SHRIEKING)) 1422 ai16 = (I16) vtohs((U16) ai16); 1423 if (!checksum) 1424 mPUSHi(ai16); 1425 else if (checksum > bits_in_uv) 1426 cdouble += (NV) ai16; 1427 else 1428 cuv += ai16; 1429 } 1430 break; 1431 case 'i': 1432 case 'i' | TYPE_IS_SHRIEKING: 1433 while (len-- > 0) { 1434 int aint; 1435 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap); 1436 if (!checksum) 1437 mPUSHi(aint); 1438 else if (checksum > bits_in_uv) 1439 cdouble += (NV)aint; 1440 else 1441 cuv += aint; 1442 } 1443 break; 1444 case 'I': 1445 case 'I' | TYPE_IS_SHRIEKING: 1446 while (len-- > 0) { 1447 unsigned int auint; 1448 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap); 1449 if (!checksum) 1450 mPUSHu(auint); 1451 else if (checksum > bits_in_uv) 1452 cdouble += (NV)auint; 1453 else 1454 cuv += auint; 1455 } 1456 break; 1457 case 'j': 1458 while (len-- > 0) { 1459 IV aiv; 1460 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap); 1461 if (!checksum) 1462 mPUSHi(aiv); 1463 else if (checksum > bits_in_uv) 1464 cdouble += (NV)aiv; 1465 else 1466 cuv += aiv; 1467 } 1468 break; 1469 case 'J': 1470 while (len-- > 0) { 1471 UV auv; 1472 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap); 1473 if (!checksum) 1474 mPUSHu(auv); 1475 else if (checksum > bits_in_uv) 1476 cdouble += (NV)auv; 1477 else 1478 cuv += auv; 1479 } 1480 break; 1481 case 'l' | TYPE_IS_SHRIEKING: 1482 #if LONGSIZE != SIZE32 1483 while (len-- > 0) { 1484 long along; 1485 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap); 1486 if (!checksum) 1487 mPUSHi(along); 1488 else if (checksum > bits_in_uv) 1489 cdouble += (NV)along; 1490 else 1491 cuv += along; 1492 } 1493 break; 1494 #else 1495 /* Fallthrough! */ 1496 #endif 1497 case 'l': 1498 while (len-- > 0) { 1499 I32 ai32; 1500 #if U32SIZE > SIZE32 1501 ai32 = 0; 1502 #endif 1503 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap); 1504 #if U32SIZE > SIZE32 1505 if (ai32 > 2147483647) ai32 -= 4294967296; 1506 #endif 1507 if (!checksum) 1508 mPUSHi(ai32); 1509 else if (checksum > bits_in_uv) 1510 cdouble += (NV)ai32; 1511 else 1512 cuv += ai32; 1513 } 1514 break; 1515 case 'L' | TYPE_IS_SHRIEKING: 1516 #if LONGSIZE != SIZE32 1517 while (len-- > 0) { 1518 unsigned long aulong; 1519 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap); 1520 if (!checksum) 1521 mPUSHu(aulong); 1522 else if (checksum > bits_in_uv) 1523 cdouble += (NV)aulong; 1524 else 1525 cuv += aulong; 1526 } 1527 break; 1528 #else 1529 /* Fall through! */ 1530 #endif 1531 case 'V': 1532 case 'N': 1533 case 'L': 1534 while (len-- > 0) { 1535 U32 au32; 1536 #if U32SIZE > SIZE32 1537 au32 = 0; 1538 #endif 1539 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap); 1540 if (datumtype == 'N') 1541 au32 = PerlSock_ntohl(au32); 1542 if (datumtype == 'V') 1543 au32 = vtohl(au32); 1544 if (!checksum) 1545 mPUSHu(au32); 1546 else if (checksum > bits_in_uv) 1547 cdouble += (NV)au32; 1548 else 1549 cuv += au32; 1550 } 1551 break; 1552 case 'V' | TYPE_IS_SHRIEKING: 1553 case 'N' | TYPE_IS_SHRIEKING: 1554 while (len-- > 0) { 1555 I32 ai32; 1556 #if U32SIZE > SIZE32 1557 ai32 = 0; 1558 #endif 1559 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap); 1560 /* There should never be any byte swapping here. */ 1561 assert(!TYPE_ENDIANNESS(datumtype)); 1562 if (datumtype == ('N' | TYPE_IS_SHRIEKING)) 1563 ai32 = (I32)PerlSock_ntohl((U32)ai32); 1564 if (datumtype == ('V' | TYPE_IS_SHRIEKING)) 1565 ai32 = (I32)vtohl((U32)ai32); 1566 if (!checksum) 1567 mPUSHi(ai32); 1568 else if (checksum > bits_in_uv) 1569 cdouble += (NV)ai32; 1570 else 1571 cuv += ai32; 1572 } 1573 break; 1574 case 'p': 1575 while (len-- > 0) { 1576 const char *aptr; 1577 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap); 1578 /* newSVpv generates undef if aptr is NULL */ 1579 mPUSHs(newSVpv(aptr, 0)); 1580 } 1581 break; 1582 case 'w': 1583 { 1584 UV auv = 0; 1585 U32 bytes = 0; 1586 1587 while (len > 0 && s < strend) { 1588 U8 ch; 1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 1590 auv = (auv << 7) | (ch & 0x7f); 1591 /* UTF8_IS_XXXXX not right here - using constant 0x80 */ 1592 if (ch < 0x80) { 1593 bytes = 0; 1594 mPUSHu(auv); 1595 len--; 1596 auv = 0; 1597 continue; 1598 } 1599 if (++bytes >= sizeof(UV)) { /* promote to string */ 1600 const char *t; 1601 1602 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv); 1603 while (s < strend) { 1604 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 1605 sv = mul128(sv, (U8)(ch & 0x7f)); 1606 if (!(ch & 0x80)) { 1607 bytes = 0; 1608 break; 1609 } 1610 } 1611 t = SvPV_nolen_const(sv); 1612 while (*t == '0') 1613 t++; 1614 sv_chop(sv, t); 1615 mPUSHs(sv); 1616 len--; 1617 auv = 0; 1618 } 1619 } 1620 if ((s >= strend) && bytes) 1621 Perl_croak(aTHX_ "Unterminated compressed integer in unpack"); 1622 } 1623 break; 1624 case 'P': 1625 if (symptr->howlen == e_star) 1626 Perl_croak(aTHX_ "'P' must have an explicit size in unpack"); 1627 EXTEND(SP, 1); 1628 if (s + sizeof(char*) <= strend) { 1629 char *aptr; 1630 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap); 1631 /* newSVpvn generates undef if aptr is NULL */ 1632 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP)); 1633 } 1634 break; 1635 #if IVSIZE >= 8 1636 case 'q': 1637 while (len-- > 0) { 1638 Quad_t aquad; 1639 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap); 1640 if (!checksum) 1641 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ? 1642 newSViv((IV)aquad) : newSVnv((NV)aquad)); 1643 else if (checksum > bits_in_uv) 1644 cdouble += (NV)aquad; 1645 else 1646 cuv += aquad; 1647 } 1648 break; 1649 case 'Q': 1650 while (len-- > 0) { 1651 Uquad_t auquad; 1652 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap); 1653 if (!checksum) 1654 mPUSHs(auquad <= UV_MAX ? 1655 newSVuv((UV)auquad) : newSVnv((NV)auquad)); 1656 else if (checksum > bits_in_uv) 1657 cdouble += (NV)auquad; 1658 else 1659 cuv += auquad; 1660 } 1661 break; 1662 #endif 1663 /* float and double added gnb@melba.bby.oz.au 22/11/89 */ 1664 case 'f': 1665 while (len-- > 0) { 1666 float afloat; 1667 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap); 1668 if (!checksum) 1669 mPUSHn(afloat); 1670 else 1671 cdouble += afloat; 1672 } 1673 break; 1674 case 'd': 1675 while (len-- > 0) { 1676 double adouble; 1677 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap); 1678 if (!checksum) 1679 mPUSHn(adouble); 1680 else 1681 cdouble += adouble; 1682 } 1683 break; 1684 case 'F': 1685 while (len-- > 0) { 1686 NV_bytes anv; 1687 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), 1688 datumtype, needs_swap); 1689 if (!checksum) 1690 mPUSHn(anv.nv); 1691 else 1692 cdouble += anv.nv; 1693 } 1694 break; 1695 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 1696 case 'D': 1697 while (len-- > 0) { 1698 ld_bytes aldouble; 1699 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, 1700 sizeof(aldouble.bytes), datumtype, needs_swap); 1701 if (!checksum) 1702 mPUSHn(aldouble.ld); 1703 else 1704 cdouble += aldouble.ld; 1705 } 1706 break; 1707 #endif 1708 case 'u': 1709 if (!checksum) { 1710 const STRLEN l = (STRLEN) (strend - s) * 3 / 4; 1711 sv = sv_2mortal(newSV(l)); 1712 if (l) SvPOK_on(sv); 1713 } 1714 if (utf8) { 1715 while (next_uni_uu(aTHX_ &s, strend, &len)) { 1716 I32 a, b, c, d; 1717 char hunk[3]; 1718 1719 while (len > 0) { 1720 next_uni_uu(aTHX_ &s, strend, &a); 1721 next_uni_uu(aTHX_ &s, strend, &b); 1722 next_uni_uu(aTHX_ &s, strend, &c); 1723 next_uni_uu(aTHX_ &s, strend, &d); 1724 hunk[0] = (char)((a << 2) | (b >> 4)); 1725 hunk[1] = (char)((b << 4) | (c >> 2)); 1726 hunk[2] = (char)((c << 6) | d); 1727 if (!checksum) 1728 sv_catpvn(sv, hunk, (len > 3) ? 3 : len); 1729 len -= 3; 1730 } 1731 if (s < strend) { 1732 if (*s == '\n') { 1733 s++; 1734 } 1735 else { 1736 /* possible checksum byte */ 1737 const char *skip = s+UTF8SKIP(s); 1738 if (skip < strend && *skip == '\n') 1739 s = skip+1; 1740 } 1741 } 1742 } 1743 } else { 1744 while (s < strend && *s > ' ' && ISUUCHAR(*s)) { 1745 I32 a, b, c, d; 1746 char hunk[3]; 1747 1748 len = PL_uudmap[*(U8*)s++] & 077; 1749 while (len > 0) { 1750 if (s < strend && ISUUCHAR(*s)) 1751 a = PL_uudmap[*(U8*)s++] & 077; 1752 else 1753 a = 0; 1754 if (s < strend && ISUUCHAR(*s)) 1755 b = PL_uudmap[*(U8*)s++] & 077; 1756 else 1757 b = 0; 1758 if (s < strend && ISUUCHAR(*s)) 1759 c = PL_uudmap[*(U8*)s++] & 077; 1760 else 1761 c = 0; 1762 if (s < strend && ISUUCHAR(*s)) 1763 d = PL_uudmap[*(U8*)s++] & 077; 1764 else 1765 d = 0; 1766 hunk[0] = (char)((a << 2) | (b >> 4)); 1767 hunk[1] = (char)((b << 4) | (c >> 2)); 1768 hunk[2] = (char)((c << 6) | d); 1769 if (!checksum) 1770 sv_catpvn(sv, hunk, (len > 3) ? 3 : len); 1771 len -= 3; 1772 } 1773 if (*s == '\n') 1774 s++; 1775 else /* possible checksum byte */ 1776 if (s + 1 < strend && s[1] == '\n') 1777 s += 2; 1778 } 1779 } 1780 if (!checksum) 1781 XPUSHs(sv); 1782 break; 1783 } 1784 1785 if (checksum) { 1786 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) || 1787 (checksum > bits_in_uv && 1788 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) { 1789 NV trouble, anv; 1790 1791 anv = (NV) (1 << (checksum & 15)); 1792 while (checksum >= 16) { 1793 checksum -= 16; 1794 anv *= 65536.0; 1795 } 1796 while (cdouble < 0.0) 1797 cdouble += anv; 1798 cdouble = Perl_modf(cdouble / anv, &trouble) * anv; 1799 sv = newSVnv(cdouble); 1800 } 1801 else { 1802 if (checksum < bits_in_uv) { 1803 UV mask = ((UV)1 << checksum) - 1; 1804 cuv &= mask; 1805 } 1806 sv = newSVuv(cuv); 1807 } 1808 mXPUSHs(sv); 1809 checksum = 0; 1810 } 1811 1812 if (symptr->flags & FLAG_SLASH){ 1813 if (SP - PL_stack_base - start_sp_offset <= 0) 1814 break; 1815 if( next_symbol(symptr) ){ 1816 if( symptr->howlen == e_number ) 1817 Perl_croak(aTHX_ "Count after length/code in unpack" ); 1818 if( beyond ){ 1819 /* ...end of char buffer then no decent length available */ 1820 Perl_croak(aTHX_ "length/code after end of string in unpack" ); 1821 } else { 1822 /* take top of stack (hope it's numeric) */ 1823 len = POPi; 1824 if( len < 0 ) 1825 Perl_croak(aTHX_ "Negative '/' count in unpack" ); 1826 } 1827 } else { 1828 Perl_croak(aTHX_ "Code missing after '/' in unpack" ); 1829 } 1830 datumtype = symptr->code; 1831 explicit_length = FALSE; 1832 goto redo_switch; 1833 } 1834 } 1835 1836 if (new_s) 1837 *new_s = s; 1838 PUTBACK; 1839 return SP - PL_stack_base - start_sp_offset; 1840 } 1841 1842 PP(pp_unpack) 1843 { 1844 dVAR; 1845 dSP; 1846 dPOPPOPssrl; 1847 I32 gimme = GIMME_V; 1848 STRLEN llen; 1849 STRLEN rlen; 1850 const char *pat = SvPV_const(left, llen); 1851 const char *s = SvPV_const(right, rlen); 1852 const char *strend = s + rlen; 1853 const char *patend = pat + llen; 1854 I32 cnt; 1855 1856 PUTBACK; 1857 cnt = unpackstring(pat, patend, s, strend, 1858 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0) 1859 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0)); 1860 1861 SPAGAIN; 1862 if ( !cnt && gimme == G_SCALAR ) 1863 PUSHs(&PL_sv_undef); 1864 RETURN; 1865 } 1866 1867 STATIC U8 * 1868 doencodes(U8 *h, const char *s, I32 len) 1869 { 1870 *h++ = PL_uuemap[len]; 1871 while (len > 2) { 1872 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 1873 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))]; 1874 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))]; 1875 *h++ = PL_uuemap[(077 & (s[2] & 077))]; 1876 s += 3; 1877 len -= 3; 1878 } 1879 if (len > 0) { 1880 const char r = (len > 1 ? s[1] : '\0'); 1881 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 1882 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))]; 1883 *h++ = PL_uuemap[(077 & ((r << 2) & 074))]; 1884 *h++ = PL_uuemap[0]; 1885 } 1886 *h++ = '\n'; 1887 return h; 1888 } 1889 1890 STATIC SV * 1891 S_is_an_int(pTHX_ const char *s, STRLEN l) 1892 { 1893 SV *result = newSVpvn(s, l); 1894 char *const result_c = SvPV_nolen(result); /* convenience */ 1895 char *out = result_c; 1896 bool skip = 1; 1897 bool ignore = 0; 1898 1899 PERL_ARGS_ASSERT_IS_AN_INT; 1900 1901 while (*s) { 1902 switch (*s) { 1903 case ' ': 1904 break; 1905 case '+': 1906 if (!skip) { 1907 SvREFCNT_dec(result); 1908 return (NULL); 1909 } 1910 break; 1911 case '0': 1912 case '1': 1913 case '2': 1914 case '3': 1915 case '4': 1916 case '5': 1917 case '6': 1918 case '7': 1919 case '8': 1920 case '9': 1921 skip = 0; 1922 if (!ignore) { 1923 *(out++) = *s; 1924 } 1925 break; 1926 case '.': 1927 ignore = 1; 1928 break; 1929 default: 1930 SvREFCNT_dec(result); 1931 return (NULL); 1932 } 1933 s++; 1934 } 1935 *(out++) = '\0'; 1936 SvCUR_set(result, out - result_c); 1937 return (result); 1938 } 1939 1940 /* pnum must be '\0' terminated */ 1941 STATIC int 1942 S_div128(pTHX_ SV *pnum, bool *done) 1943 { 1944 STRLEN len; 1945 char * const s = SvPV(pnum, len); 1946 char *t = s; 1947 int m = 0; 1948 1949 PERL_ARGS_ASSERT_DIV128; 1950 1951 *done = 1; 1952 while (*t) { 1953 const int i = m * 10 + (*t - '0'); 1954 const int r = (i >> 7); /* r < 10 */ 1955 m = i & 0x7F; 1956 if (r) { 1957 *done = 0; 1958 } 1959 *(t++) = '0' + r; 1960 } 1961 *(t++) = '\0'; 1962 SvCUR_set(pnum, (STRLEN) (t - s)); 1963 return (m); 1964 } 1965 1966 /* 1967 =for apidoc packlist 1968 1969 The engine implementing pack() Perl function. 1970 1971 =cut 1972 */ 1973 1974 void 1975 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist ) 1976 { 1977 dVAR; 1978 tempsym_t sym; 1979 1980 PERL_ARGS_ASSERT_PACKLIST; 1981 1982 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); 1983 1984 /* We're going to do changes through SvPVX(cat). Make sure it's valid. 1985 Also make sure any UTF8 flag is loaded */ 1986 SvPV_force_nolen(cat); 1987 if (DO_UTF8(cat)) 1988 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8; 1989 1990 (void)pack_rec( cat, &sym, beglist, endlist ); 1991 } 1992 1993 /* like sv_utf8_upgrade, but also repoint the group start markers */ 1994 STATIC void 1995 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) { 1996 STRLEN len; 1997 tempsym_t *group; 1998 const char *from_ptr, *from_start, *from_end, **marks, **m; 1999 char *to_start, *to_ptr; 2000 2001 if (SvUTF8(sv)) return; 2002 2003 from_start = SvPVX_const(sv); 2004 from_end = from_start + SvCUR(sv); 2005 for (from_ptr = from_start; from_ptr < from_end; from_ptr++) 2006 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break; 2007 if (from_ptr == from_end) { 2008 /* Simple case: no character needs to be changed */ 2009 SvUTF8_on(sv); 2010 return; 2011 } 2012 2013 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1; 2014 Newx(to_start, len, char); 2015 Copy(from_start, to_start, from_ptr-from_start, char); 2016 to_ptr = to_start + (from_ptr-from_start); 2017 2018 Newx(marks, sym_ptr->level+2, const char *); 2019 for (group=sym_ptr; group; group = group->previous) 2020 marks[group->level] = from_start + group->strbeg; 2021 marks[sym_ptr->level+1] = from_end+1; 2022 for (m = marks; *m < from_ptr; m++) 2023 *m = to_start + (*m-from_start); 2024 2025 for (;from_ptr < from_end; from_ptr++) { 2026 while (*m == from_ptr) *m++ = to_ptr; 2027 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr); 2028 } 2029 *to_ptr = 0; 2030 2031 while (*m == from_ptr) *m++ = to_ptr; 2032 if (m != marks + sym_ptr->level+1) { 2033 Safefree(marks); 2034 Safefree(to_start); 2035 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, " 2036 "level=%d", m, marks, sym_ptr->level); 2037 } 2038 for (group=sym_ptr; group; group = group->previous) 2039 group->strbeg = marks[group->level] - to_start; 2040 Safefree(marks); 2041 2042 if (SvOOK(sv)) { 2043 if (SvIVX(sv)) { 2044 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv)); 2045 from_start -= SvIVX(sv); 2046 SvIV_set(sv, 0); 2047 } 2048 SvFLAGS(sv) &= ~SVf_OOK; 2049 } 2050 if (SvLEN(sv) != 0) 2051 Safefree(from_start); 2052 SvPV_set(sv, to_start); 2053 SvCUR_set(sv, to_ptr - to_start); 2054 SvLEN_set(sv, len); 2055 SvUTF8_on(sv); 2056 } 2057 2058 /* Exponential string grower. Makes string extension effectively O(n) 2059 needed says how many extra bytes we need (not counting the final '\0') 2060 Only grows the string if there is an actual lack of space 2061 */ 2062 STATIC char * 2063 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) { 2064 const STRLEN cur = SvCUR(sv); 2065 const STRLEN len = SvLEN(sv); 2066 STRLEN extend; 2067 2068 PERL_ARGS_ASSERT_SV_EXP_GROW; 2069 2070 if (len - cur > needed) return SvPVX(sv); 2071 extend = needed > len ? needed : len; 2072 return SvGROW(sv, len+extend+1); 2073 } 2074 2075 STATIC 2076 SV ** 2077 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) 2078 { 2079 dVAR; 2080 tempsym_t lookahead; 2081 I32 items = endlist - beglist; 2082 bool found = next_symbol(symptr); 2083 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 2084 bool warn_utf8 = ckWARN(WARN_UTF8); 2085 2086 PERL_ARGS_ASSERT_PACK_REC; 2087 2088 if (symptr->level == 0 && found && symptr->code == 'U') { 2089 marked_upgrade(aTHX_ cat, symptr); 2090 symptr->flags |= FLAG_DO_UTF8; 2091 utf8 = 0; 2092 } 2093 symptr->strbeg = SvCUR(cat); 2094 2095 while (found) { 2096 SV *fromstr; 2097 STRLEN fromlen; 2098 I32 len; 2099 SV *lengthcode = NULL; 2100 I32 datumtype = symptr->code; 2101 howlen_t howlen = symptr->howlen; 2102 char *start = SvPVX(cat); 2103 char *cur = start + SvCUR(cat); 2104 bool needs_swap; 2105 2106 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no) 2107 2108 switch (howlen) { 2109 case e_star: 2110 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 2111 0 : items; 2112 break; 2113 default: 2114 /* e_no_len and e_number */ 2115 len = symptr->length; 2116 break; 2117 } 2118 2119 if (len) { 2120 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 2121 2122 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) { 2123 /* We can process this letter. */ 2124 STRLEN size = props & PACK_SIZE_MASK; 2125 GROWING(utf8, cat, start, cur, (STRLEN) len * size); 2126 } 2127 } 2128 2129 /* Look ahead for next symbol. Do we have code/code? */ 2130 lookahead = *symptr; 2131 found = next_symbol(&lookahead); 2132 if (symptr->flags & FLAG_SLASH) { 2133 IV count; 2134 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack"); 2135 if (strchr("aAZ", lookahead.code)) { 2136 if (lookahead.howlen == e_number) count = lookahead.length; 2137 else { 2138 if (items > 0) { 2139 count = sv_len_utf8(*beglist); 2140 } 2141 else count = 0; 2142 if (lookahead.code == 'Z') count++; 2143 } 2144 } else { 2145 if (lookahead.howlen == e_number && lookahead.length < items) 2146 count = lookahead.length; 2147 else count = items; 2148 } 2149 lookahead.howlen = e_number; 2150 lookahead.length = count; 2151 lengthcode = sv_2mortal(newSViv(count)); 2152 } 2153 2154 needs_swap = NEEDS_SWAP(datumtype); 2155 2156 /* Code inside the switch must take care to properly update 2157 cat (CUR length and '\0' termination) if it updated *cur and 2158 doesn't simply leave using break */ 2159 switch(TYPE_NO_ENDIANNESS(datumtype)) { 2160 default: 2161 Perl_croak(aTHX_ "Invalid type '%c' in pack", 2162 (int) TYPE_NO_MODIFIERS(datumtype)); 2163 case '%': 2164 Perl_croak(aTHX_ "'%%' may not be used in pack"); 2165 { 2166 char *from; 2167 case '.' | TYPE_IS_SHRIEKING: 2168 case '.': 2169 if (howlen == e_star) from = start; 2170 else if (len == 0) from = cur; 2171 else { 2172 tempsym_t *group = symptr; 2173 2174 while (--len && group) group = group->previous; 2175 from = group ? start + group->strbeg : start; 2176 } 2177 fromstr = NEXTFROM; 2178 len = SvIV(fromstr); 2179 goto resize; 2180 case '@' | TYPE_IS_SHRIEKING: 2181 case '@': 2182 from = start + symptr->strbeg; 2183 resize: 2184 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 2185 if (len >= 0) { 2186 while (len && from < cur) { 2187 from += UTF8SKIP(from); 2188 len--; 2189 } 2190 if (from > cur) 2191 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2192 if (len) { 2193 /* Here we know from == cur */ 2194 grow: 2195 GROWING(0, cat, start, cur, len); 2196 Zero(cur, len, char); 2197 cur += len; 2198 } else if (from < cur) { 2199 len = cur - from; 2200 goto shrink; 2201 } else goto no_change; 2202 } else { 2203 cur = from; 2204 len = -len; 2205 goto utf8_shrink; 2206 } 2207 else { 2208 len -= cur - from; 2209 if (len > 0) goto grow; 2210 if (len == 0) goto no_change; 2211 len = -len; 2212 goto shrink; 2213 } 2214 break; 2215 } 2216 case '(': { 2217 tempsym_t savsym = *symptr; 2218 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 2219 symptr->flags |= group_modifiers; 2220 symptr->patend = savsym.grpend; 2221 symptr->level++; 2222 symptr->previous = &lookahead; 2223 while (len--) { 2224 U32 was_utf8; 2225 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 2226 else symptr->flags &= ~FLAG_PARSE_UTF8; 2227 was_utf8 = SvUTF8(cat); 2228 symptr->patptr = savsym.grpbeg; 2229 beglist = pack_rec(cat, symptr, beglist, endlist); 2230 if (SvUTF8(cat) != was_utf8) 2231 /* This had better be an upgrade while in utf8==0 mode */ 2232 utf8 = 1; 2233 2234 if (savsym.howlen == e_star && beglist == endlist) 2235 break; /* No way to continue */ 2236 } 2237 items = endlist - beglist; 2238 lookahead.flags = symptr->flags & ~group_modifiers; 2239 goto no_change; 2240 } 2241 case 'X' | TYPE_IS_SHRIEKING: 2242 if (!len) /* Avoid division by 0 */ 2243 len = 1; 2244 if (utf8) { 2245 char *hop, *last; 2246 I32 l = len; 2247 hop = last = start; 2248 while (hop < cur) { 2249 hop += UTF8SKIP(hop); 2250 if (--l == 0) { 2251 last = hop; 2252 l = len; 2253 } 2254 } 2255 if (last > cur) 2256 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2257 cur = last; 2258 break; 2259 } 2260 len = (cur-start) % len; 2261 /* FALL THROUGH */ 2262 case 'X': 2263 if (utf8) { 2264 if (len < 1) goto no_change; 2265 utf8_shrink: 2266 while (len > 0) { 2267 if (cur <= start) 2268 Perl_croak(aTHX_ "'%c' outside of string in pack", 2269 (int) TYPE_NO_MODIFIERS(datumtype)); 2270 while (--cur, UTF8_IS_CONTINUATION(*cur)) { 2271 if (cur <= start) 2272 Perl_croak(aTHX_ "'%c' outside of string in pack", 2273 (int) TYPE_NO_MODIFIERS(datumtype)); 2274 } 2275 len--; 2276 } 2277 } else { 2278 shrink: 2279 if (cur - start < len) 2280 Perl_croak(aTHX_ "'%c' outside of string in pack", 2281 (int) TYPE_NO_MODIFIERS(datumtype)); 2282 cur -= len; 2283 } 2284 if (cur < start+symptr->strbeg) { 2285 /* Make sure group starts don't point into the void */ 2286 tempsym_t *group; 2287 const STRLEN length = cur-start; 2288 for (group = symptr; 2289 group && length < group->strbeg; 2290 group = group->previous) group->strbeg = length; 2291 lookahead.strbeg = length; 2292 } 2293 break; 2294 case 'x' | TYPE_IS_SHRIEKING: { 2295 I32 ai32; 2296 if (!len) /* Avoid division by 0 */ 2297 len = 1; 2298 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len; 2299 else ai32 = (cur - start) % len; 2300 if (ai32 == 0) goto no_change; 2301 len -= ai32; 2302 } 2303 /* FALL THROUGH */ 2304 case 'x': 2305 goto grow; 2306 case 'A': 2307 case 'Z': 2308 case 'a': { 2309 const char *aptr; 2310 2311 fromstr = NEXTFROM; 2312 aptr = SvPV_const(fromstr, fromlen); 2313 if (DO_UTF8(fromstr)) { 2314 const char *end, *s; 2315 2316 if (!utf8 && !SvUTF8(cat)) { 2317 marked_upgrade(aTHX_ cat, symptr); 2318 lookahead.flags |= FLAG_DO_UTF8; 2319 lookahead.strbeg = symptr->strbeg; 2320 utf8 = 1; 2321 start = SvPVX(cat); 2322 cur = start + SvCUR(cat); 2323 } 2324 if (howlen == e_star) { 2325 if (utf8) goto string_copy; 2326 len = fromlen+1; 2327 } 2328 s = aptr; 2329 end = aptr + fromlen; 2330 fromlen = datumtype == 'Z' ? len-1 : len; 2331 while ((I32) fromlen > 0 && s < end) { 2332 s += UTF8SKIP(s); 2333 fromlen--; 2334 } 2335 if (s > end) 2336 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2337 if (utf8) { 2338 len = fromlen; 2339 if (datumtype == 'Z') len++; 2340 fromlen = s-aptr; 2341 len += fromlen; 2342 2343 goto string_copy; 2344 } 2345 fromlen = len - fromlen; 2346 if (datumtype == 'Z') fromlen--; 2347 if (howlen == e_star) { 2348 len = fromlen; 2349 if (datumtype == 'Z') len++; 2350 } 2351 GROWING(0, cat, start, cur, len); 2352 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen, 2353 datumtype | TYPE_IS_PACK)) 2354 Perl_croak(aTHX_ "panic: predicted utf8 length not available, " 2355 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf, 2356 (int)datumtype, aptr, end, cur, (UV)fromlen); 2357 cur += fromlen; 2358 len -= fromlen; 2359 } else if (utf8) { 2360 if (howlen == e_star) { 2361 len = fromlen; 2362 if (datumtype == 'Z') len++; 2363 } 2364 if (len <= (I32) fromlen) { 2365 fromlen = len; 2366 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2367 } 2368 /* assumes a byte expands to at most UTF8_EXPAND bytes on 2369 upgrade, so: 2370 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */ 2371 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len); 2372 len -= fromlen; 2373 while (fromlen > 0) { 2374 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr); 2375 aptr++; 2376 fromlen--; 2377 } 2378 } else { 2379 string_copy: 2380 if (howlen == e_star) { 2381 len = fromlen; 2382 if (datumtype == 'Z') len++; 2383 } 2384 if (len <= (I32) fromlen) { 2385 fromlen = len; 2386 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2387 } 2388 GROWING(0, cat, start, cur, len); 2389 Copy(aptr, cur, fromlen, char); 2390 cur += fromlen; 2391 len -= fromlen; 2392 } 2393 memset(cur, datumtype == 'A' ? ' ' : '\0', len); 2394 cur += len; 2395 SvTAINT(cat); 2396 break; 2397 } 2398 case 'B': 2399 case 'b': { 2400 const char *str, *end; 2401 I32 l, field_len; 2402 U8 bits; 2403 bool utf8_source; 2404 U32 utf8_flags; 2405 2406 fromstr = NEXTFROM; 2407 str = SvPV_const(fromstr, fromlen); 2408 end = str + fromlen; 2409 if (DO_UTF8(fromstr)) { 2410 utf8_source = TRUE; 2411 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2412 } else { 2413 utf8_source = FALSE; 2414 utf8_flags = 0; /* Unused, but keep compilers happy */ 2415 } 2416 if (howlen == e_star) len = fromlen; 2417 field_len = (len+7)/8; 2418 GROWING(utf8, cat, start, cur, field_len); 2419 if (len > (I32)fromlen) len = fromlen; 2420 bits = 0; 2421 l = 0; 2422 if (datumtype == 'B') 2423 while (l++ < len) { 2424 if (utf8_source) { 2425 UV val = 0; 2426 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2427 bits |= val & 1; 2428 } else bits |= *str++ & 1; 2429 if (l & 7) bits <<= 1; 2430 else { 2431 PUSH_BYTE(utf8, cur, bits); 2432 bits = 0; 2433 } 2434 } 2435 else 2436 /* datumtype == 'b' */ 2437 while (l++ < len) { 2438 if (utf8_source) { 2439 UV val = 0; 2440 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2441 if (val & 1) bits |= 0x80; 2442 } else if (*str++ & 1) 2443 bits |= 0x80; 2444 if (l & 7) bits >>= 1; 2445 else { 2446 PUSH_BYTE(utf8, cur, bits); 2447 bits = 0; 2448 } 2449 } 2450 l--; 2451 if (l & 7) { 2452 if (datumtype == 'B') 2453 bits <<= 7 - (l & 7); 2454 else 2455 bits >>= 7 - (l & 7); 2456 PUSH_BYTE(utf8, cur, bits); 2457 l += 7; 2458 } 2459 /* Determine how many chars are left in the requested field */ 2460 l /= 8; 2461 if (howlen == e_star) field_len = 0; 2462 else field_len -= l; 2463 Zero(cur, field_len, char); 2464 cur += field_len; 2465 break; 2466 } 2467 case 'H': 2468 case 'h': { 2469 const char *str, *end; 2470 I32 l, field_len; 2471 U8 bits; 2472 bool utf8_source; 2473 U32 utf8_flags; 2474 2475 fromstr = NEXTFROM; 2476 str = SvPV_const(fromstr, fromlen); 2477 end = str + fromlen; 2478 if (DO_UTF8(fromstr)) { 2479 utf8_source = TRUE; 2480 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2481 } else { 2482 utf8_source = FALSE; 2483 utf8_flags = 0; /* Unused, but keep compilers happy */ 2484 } 2485 if (howlen == e_star) len = fromlen; 2486 field_len = (len+1)/2; 2487 GROWING(utf8, cat, start, cur, field_len); 2488 if (!utf8 && len > (I32)fromlen) len = fromlen; 2489 bits = 0; 2490 l = 0; 2491 if (datumtype == 'H') 2492 while (l++ < len) { 2493 if (utf8_source) { 2494 UV val = 0; 2495 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2496 if (val < 256 && isALPHA(val)) 2497 bits |= (val + 9) & 0xf; 2498 else 2499 bits |= val & 0xf; 2500 } else if (isALPHA(*str)) 2501 bits |= (*str++ + 9) & 0xf; 2502 else 2503 bits |= *str++ & 0xf; 2504 if (l & 1) bits <<= 4; 2505 else { 2506 PUSH_BYTE(utf8, cur, bits); 2507 bits = 0; 2508 } 2509 } 2510 else 2511 while (l++ < len) { 2512 if (utf8_source) { 2513 UV val = 0; 2514 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2515 if (val < 256 && isALPHA(val)) 2516 bits |= ((val + 9) & 0xf) << 4; 2517 else 2518 bits |= (val & 0xf) << 4; 2519 } else if (isALPHA(*str)) 2520 bits |= ((*str++ + 9) & 0xf) << 4; 2521 else 2522 bits |= (*str++ & 0xf) << 4; 2523 if (l & 1) bits >>= 4; 2524 else { 2525 PUSH_BYTE(utf8, cur, bits); 2526 bits = 0; 2527 } 2528 } 2529 l--; 2530 if (l & 1) { 2531 PUSH_BYTE(utf8, cur, bits); 2532 l++; 2533 } 2534 /* Determine how many chars are left in the requested field */ 2535 l /= 2; 2536 if (howlen == e_star) field_len = 0; 2537 else field_len -= l; 2538 Zero(cur, field_len, char); 2539 cur += field_len; 2540 break; 2541 } 2542 case 'c': 2543 while (len-- > 0) { 2544 IV aiv; 2545 fromstr = NEXTFROM; 2546 aiv = SvIV(fromstr); 2547 if ((-128 > aiv || aiv > 127)) 2548 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2549 "Character in 'c' format wrapped in pack"); 2550 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); 2551 } 2552 break; 2553 case 'C': 2554 if (len == 0) { 2555 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 2556 break; 2557 } 2558 while (len-- > 0) { 2559 IV aiv; 2560 fromstr = NEXTFROM; 2561 aiv = SvIV(fromstr); 2562 if ((0 > aiv || aiv > 0xff)) 2563 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2564 "Character in 'C' format wrapped in pack"); 2565 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); 2566 } 2567 break; 2568 case 'W': { 2569 char *end; 2570 U8 in_bytes = (U8)IN_BYTES; 2571 2572 end = start+SvLEN(cat)-1; 2573 if (utf8) end -= UTF8_MAXLEN-1; 2574 while (len-- > 0) { 2575 UV auv; 2576 fromstr = NEXTFROM; 2577 auv = SvUV(fromstr); 2578 if (in_bytes) auv = auv % 0x100; 2579 if (utf8) { 2580 W_utf8: 2581 if (cur > end) { 2582 *cur = '\0'; 2583 SvCUR_set(cat, cur - start); 2584 2585 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 2586 end = start+SvLEN(cat)-UTF8_MAXLEN; 2587 } 2588 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, 2589 auv, 2590 warn_utf8 ? 2591 0 : UNICODE_ALLOW_ANY); 2592 } else { 2593 if (auv >= 0x100) { 2594 if (!SvUTF8(cat)) { 2595 *cur = '\0'; 2596 SvCUR_set(cat, cur - start); 2597 marked_upgrade(aTHX_ cat, symptr); 2598 lookahead.flags |= FLAG_DO_UTF8; 2599 lookahead.strbeg = symptr->strbeg; 2600 utf8 = 1; 2601 start = SvPVX(cat); 2602 cur = start + SvCUR(cat); 2603 end = start+SvLEN(cat)-UTF8_MAXLEN; 2604 goto W_utf8; 2605 } 2606 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2607 "Character in 'W' format wrapped in pack"); 2608 auv &= 0xff; 2609 } 2610 if (cur >= end) { 2611 *cur = '\0'; 2612 SvCUR_set(cat, cur - start); 2613 GROWING(0, cat, start, cur, len+1); 2614 end = start+SvLEN(cat)-1; 2615 } 2616 *(U8 *) cur++ = (U8)auv; 2617 } 2618 } 2619 break; 2620 } 2621 case 'U': { 2622 char *end; 2623 2624 if (len == 0) { 2625 if (!(symptr->flags & FLAG_DO_UTF8)) { 2626 marked_upgrade(aTHX_ cat, symptr); 2627 lookahead.flags |= FLAG_DO_UTF8; 2628 lookahead.strbeg = symptr->strbeg; 2629 } 2630 utf8 = 0; 2631 goto no_change; 2632 } 2633 2634 end = start+SvLEN(cat); 2635 if (!utf8) end -= UTF8_MAXLEN; 2636 while (len-- > 0) { 2637 UV auv; 2638 fromstr = NEXTFROM; 2639 auv = SvUV(fromstr); 2640 if (utf8) { 2641 U8 buffer[UTF8_MAXLEN], *endb; 2642 endb = uvchr_to_utf8_flags(buffer, auv, 2643 warn_utf8 ? 2644 0 : UNICODE_ALLOW_ANY); 2645 if (cur+(endb-buffer)*UTF8_EXPAND >= end) { 2646 *cur = '\0'; 2647 SvCUR_set(cat, cur - start); 2648 GROWING(0, cat, start, cur, 2649 len+(endb-buffer)*UTF8_EXPAND); 2650 end = start+SvLEN(cat); 2651 } 2652 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0); 2653 } else { 2654 if (cur >= end) { 2655 *cur = '\0'; 2656 SvCUR_set(cat, cur - start); 2657 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 2658 end = start+SvLEN(cat)-UTF8_MAXLEN; 2659 } 2660 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 2661 warn_utf8 ? 2662 0 : UNICODE_ALLOW_ANY); 2663 } 2664 } 2665 break; 2666 } 2667 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */ 2668 case 'f': 2669 while (len-- > 0) { 2670 float afloat; 2671 NV anv; 2672 fromstr = NEXTFROM; 2673 anv = SvNV(fromstr); 2674 # if defined(VMS) && !defined(_IEEE_FP) 2675 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 2676 * on Alpha; fake it if we don't have them. 2677 */ 2678 if (anv > FLT_MAX) 2679 afloat = FLT_MAX; 2680 else if (anv < -FLT_MAX) 2681 afloat = -FLT_MAX; 2682 else afloat = (float)anv; 2683 # else 2684 afloat = (float)anv; 2685 # endif 2686 PUSH_VAR(utf8, cur, afloat, needs_swap); 2687 } 2688 break; 2689 case 'd': 2690 while (len-- > 0) { 2691 double adouble; 2692 NV anv; 2693 fromstr = NEXTFROM; 2694 anv = SvNV(fromstr); 2695 # if defined(VMS) && !defined(_IEEE_FP) 2696 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 2697 * on Alpha; fake it if we don't have them. 2698 */ 2699 if (anv > DBL_MAX) 2700 adouble = DBL_MAX; 2701 else if (anv < -DBL_MAX) 2702 adouble = -DBL_MAX; 2703 else adouble = (double)anv; 2704 # else 2705 adouble = (double)anv; 2706 # endif 2707 PUSH_VAR(utf8, cur, adouble, needs_swap); 2708 } 2709 break; 2710 case 'F': { 2711 NV_bytes anv; 2712 Zero(&anv, 1, NV); /* can be long double with unused bits */ 2713 while (len-- > 0) { 2714 fromstr = NEXTFROM; 2715 #ifdef __GNUC__ 2716 /* to work round a gcc/x86 bug; don't use SvNV */ 2717 anv.nv = sv_2nv(fromstr); 2718 #else 2719 anv.nv = SvNV(fromstr); 2720 #endif 2721 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap); 2722 } 2723 break; 2724 } 2725 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 2726 case 'D': { 2727 ld_bytes aldouble; 2728 /* long doubles can have unused bits, which may be nonzero */ 2729 Zero(&aldouble, 1, long double); 2730 while (len-- > 0) { 2731 fromstr = NEXTFROM; 2732 # ifdef __GNUC__ 2733 /* to work round a gcc/x86 bug; don't use SvNV */ 2734 aldouble.ld = (long double)sv_2nv(fromstr); 2735 # else 2736 aldouble.ld = (long double)SvNV(fromstr); 2737 # endif 2738 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes), 2739 needs_swap); 2740 } 2741 break; 2742 } 2743 #endif 2744 case 'n' | TYPE_IS_SHRIEKING: 2745 case 'n': 2746 while (len-- > 0) { 2747 I16 ai16; 2748 fromstr = NEXTFROM; 2749 ai16 = (I16)SvIV(fromstr); 2750 ai16 = PerlSock_htons(ai16); 2751 PUSH16(utf8, cur, &ai16, FALSE); 2752 } 2753 break; 2754 case 'v' | TYPE_IS_SHRIEKING: 2755 case 'v': 2756 while (len-- > 0) { 2757 I16 ai16; 2758 fromstr = NEXTFROM; 2759 ai16 = (I16)SvIV(fromstr); 2760 ai16 = htovs(ai16); 2761 PUSH16(utf8, cur, &ai16, FALSE); 2762 } 2763 break; 2764 case 'S' | TYPE_IS_SHRIEKING: 2765 #if SHORTSIZE != SIZE16 2766 while (len-- > 0) { 2767 unsigned short aushort; 2768 fromstr = NEXTFROM; 2769 aushort = SvUV(fromstr); 2770 PUSH_VAR(utf8, cur, aushort, needs_swap); 2771 } 2772 break; 2773 #else 2774 /* Fall through! */ 2775 #endif 2776 case 'S': 2777 while (len-- > 0) { 2778 U16 au16; 2779 fromstr = NEXTFROM; 2780 au16 = (U16)SvUV(fromstr); 2781 PUSH16(utf8, cur, &au16, needs_swap); 2782 } 2783 break; 2784 case 's' | TYPE_IS_SHRIEKING: 2785 #if SHORTSIZE != SIZE16 2786 while (len-- > 0) { 2787 short ashort; 2788 fromstr = NEXTFROM; 2789 ashort = SvIV(fromstr); 2790 PUSH_VAR(utf8, cur, ashort, needs_swap); 2791 } 2792 break; 2793 #else 2794 /* Fall through! */ 2795 #endif 2796 case 's': 2797 while (len-- > 0) { 2798 I16 ai16; 2799 fromstr = NEXTFROM; 2800 ai16 = (I16)SvIV(fromstr); 2801 PUSH16(utf8, cur, &ai16, needs_swap); 2802 } 2803 break; 2804 case 'I': 2805 case 'I' | TYPE_IS_SHRIEKING: 2806 while (len-- > 0) { 2807 unsigned int auint; 2808 fromstr = NEXTFROM; 2809 auint = SvUV(fromstr); 2810 PUSH_VAR(utf8, cur, auint, needs_swap); 2811 } 2812 break; 2813 case 'j': 2814 while (len-- > 0) { 2815 IV aiv; 2816 fromstr = NEXTFROM; 2817 aiv = SvIV(fromstr); 2818 PUSH_VAR(utf8, cur, aiv, needs_swap); 2819 } 2820 break; 2821 case 'J': 2822 while (len-- > 0) { 2823 UV auv; 2824 fromstr = NEXTFROM; 2825 auv = SvUV(fromstr); 2826 PUSH_VAR(utf8, cur, auv, needs_swap); 2827 } 2828 break; 2829 case 'w': 2830 while (len-- > 0) { 2831 NV anv; 2832 fromstr = NEXTFROM; 2833 anv = SvNV(fromstr); 2834 2835 if (anv < 0) { 2836 *cur = '\0'; 2837 SvCUR_set(cat, cur - start); 2838 Perl_croak(aTHX_ "Cannot compress negative numbers in pack"); 2839 } 2840 2841 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0, 2842 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as 2843 any negative IVs will have already been got by the croak() 2844 above. IOK is untrue for fractions, so we test them 2845 against UV_MAX_P1. */ 2846 if (SvIOK(fromstr) || anv < UV_MAX_P1) { 2847 char buf[(sizeof(UV)*CHAR_BIT)/7+1]; 2848 char *in = buf + sizeof(buf); 2849 UV auv = SvUV(fromstr); 2850 2851 do { 2852 *--in = (char)((auv & 0x7f) | 0x80); 2853 auv >>= 7; 2854 } while (auv); 2855 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 2856 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2857 in, (buf + sizeof(buf)) - in); 2858 } else if (SvPOKp(fromstr)) 2859 goto w_string; 2860 else if (SvNOKp(fromstr)) { 2861 /* 10**NV_MAX_10_EXP is the largest power of 10 2862 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable 2863 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x: 2864 x = (NV_MAX_10_EXP+1) * log (10) / log (128) 2865 And with that many bytes only Inf can overflow. 2866 Some C compilers are strict about integral constant 2867 expressions so we conservatively divide by a slightly 2868 smaller integer instead of multiplying by the exact 2869 floating-point value. 2870 */ 2871 #ifdef NV_MAX_10_EXP 2872 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */ 2873 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */ 2874 #else 2875 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */ 2876 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */ 2877 #endif 2878 char *in = buf + sizeof(buf); 2879 2880 anv = Perl_floor(anv); 2881 do { 2882 const NV next = Perl_floor(anv / 128); 2883 if (in <= buf) /* this cannot happen ;-) */ 2884 Perl_croak(aTHX_ "Cannot compress integer in pack"); 2885 *--in = (unsigned char)(anv - (next * 128)) | 0x80; 2886 anv = next; 2887 } while (anv > 0); 2888 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 2889 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2890 in, (buf + sizeof(buf)) - in); 2891 } else { 2892 const char *from; 2893 char *result, *in; 2894 SV *norm; 2895 STRLEN len; 2896 bool done; 2897 2898 w_string: 2899 /* Copy string and check for compliance */ 2900 from = SvPV_const(fromstr, len); 2901 if ((norm = is_an_int(from, len)) == NULL) 2902 Perl_croak(aTHX_ "Can only compress unsigned integers in pack"); 2903 2904 Newx(result, len, char); 2905 in = result + len; 2906 done = FALSE; 2907 while (!done) *--in = div128(norm, &done) | 0x80; 2908 result[len - 1] &= 0x7F; /* clear continue bit */ 2909 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2910 in, (result + len) - in); 2911 Safefree(result); 2912 SvREFCNT_dec(norm); /* free norm */ 2913 } 2914 } 2915 break; 2916 case 'i': 2917 case 'i' | TYPE_IS_SHRIEKING: 2918 while (len-- > 0) { 2919 int aint; 2920 fromstr = NEXTFROM; 2921 aint = SvIV(fromstr); 2922 PUSH_VAR(utf8, cur, aint, needs_swap); 2923 } 2924 break; 2925 case 'N' | TYPE_IS_SHRIEKING: 2926 case 'N': 2927 while (len-- > 0) { 2928 U32 au32; 2929 fromstr = NEXTFROM; 2930 au32 = SvUV(fromstr); 2931 au32 = PerlSock_htonl(au32); 2932 PUSH32(utf8, cur, &au32, FALSE); 2933 } 2934 break; 2935 case 'V' | TYPE_IS_SHRIEKING: 2936 case 'V': 2937 while (len-- > 0) { 2938 U32 au32; 2939 fromstr = NEXTFROM; 2940 au32 = SvUV(fromstr); 2941 au32 = htovl(au32); 2942 PUSH32(utf8, cur, &au32, FALSE); 2943 } 2944 break; 2945 case 'L' | TYPE_IS_SHRIEKING: 2946 #if LONGSIZE != SIZE32 2947 while (len-- > 0) { 2948 unsigned long aulong; 2949 fromstr = NEXTFROM; 2950 aulong = SvUV(fromstr); 2951 PUSH_VAR(utf8, cur, aulong, needs_swap); 2952 } 2953 break; 2954 #else 2955 /* Fall though! */ 2956 #endif 2957 case 'L': 2958 while (len-- > 0) { 2959 U32 au32; 2960 fromstr = NEXTFROM; 2961 au32 = SvUV(fromstr); 2962 PUSH32(utf8, cur, &au32, needs_swap); 2963 } 2964 break; 2965 case 'l' | TYPE_IS_SHRIEKING: 2966 #if LONGSIZE != SIZE32 2967 while (len-- > 0) { 2968 long along; 2969 fromstr = NEXTFROM; 2970 along = SvIV(fromstr); 2971 PUSH_VAR(utf8, cur, along, needs_swap); 2972 } 2973 break; 2974 #else 2975 /* Fall though! */ 2976 #endif 2977 case 'l': 2978 while (len-- > 0) { 2979 I32 ai32; 2980 fromstr = NEXTFROM; 2981 ai32 = SvIV(fromstr); 2982 PUSH32(utf8, cur, &ai32, needs_swap); 2983 } 2984 break; 2985 #if IVSIZE >= 8 2986 case 'Q': 2987 while (len-- > 0) { 2988 Uquad_t auquad; 2989 fromstr = NEXTFROM; 2990 auquad = (Uquad_t) SvUV(fromstr); 2991 PUSH_VAR(utf8, cur, auquad, needs_swap); 2992 } 2993 break; 2994 case 'q': 2995 while (len-- > 0) { 2996 Quad_t aquad; 2997 fromstr = NEXTFROM; 2998 aquad = (Quad_t)SvIV(fromstr); 2999 PUSH_VAR(utf8, cur, aquad, needs_swap); 3000 } 3001 break; 3002 #endif 3003 case 'P': 3004 len = 1; /* assume SV is correct length */ 3005 GROWING(utf8, cat, start, cur, sizeof(char *)); 3006 /* Fall through! */ 3007 case 'p': 3008 while (len-- > 0) { 3009 const char *aptr; 3010 3011 fromstr = NEXTFROM; 3012 SvGETMAGIC(fromstr); 3013 if (!SvOK(fromstr)) aptr = NULL; 3014 else { 3015 /* XXX better yet, could spirit away the string to 3016 * a safe spot and hang on to it until the result 3017 * of pack() (and all copies of the result) are 3018 * gone. 3019 */ 3020 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) && 3021 !SvREADONLY(fromstr)))) { 3022 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3023 "Attempt to pack pointer to temporary value"); 3024 } 3025 if (SvPOK(fromstr) || SvNIOK(fromstr)) 3026 aptr = SvPV_nomg_const_nolen(fromstr); 3027 else 3028 aptr = SvPV_force_flags_nolen(fromstr, 0); 3029 } 3030 PUSH_VAR(utf8, cur, aptr, needs_swap); 3031 } 3032 break; 3033 case 'u': { 3034 const char *aptr, *aend; 3035 bool from_utf8; 3036 3037 fromstr = NEXTFROM; 3038 if (len <= 2) len = 45; 3039 else len = len / 3 * 3; 3040 if (len >= 64) { 3041 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3042 "Field too wide in 'u' format in pack"); 3043 len = 63; 3044 } 3045 aptr = SvPV_const(fromstr, fromlen); 3046 from_utf8 = DO_UTF8(fromstr); 3047 if (from_utf8) { 3048 aend = aptr + fromlen; 3049 fromlen = sv_len_utf8_nomg(fromstr); 3050 } else aend = NULL; /* Unused, but keep compilers happy */ 3051 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2); 3052 while (fromlen > 0) { 3053 U8 *end; 3054 I32 todo; 3055 U8 hunk[1+63/3*4+1]; 3056 3057 if ((I32)fromlen > len) 3058 todo = len; 3059 else 3060 todo = fromlen; 3061 if (from_utf8) { 3062 char buffer[64]; 3063 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo, 3064 'u' | TYPE_IS_PACK)) { 3065 *cur = '\0'; 3066 SvCUR_set(cat, cur - start); 3067 Perl_croak(aTHX_ "panic: string is shorter than advertised, " 3068 "aptr=%p, aend=%p, buffer=%p, todo=%ld", 3069 aptr, aend, buffer, (long) todo); 3070 } 3071 end = doencodes(hunk, buffer, todo); 3072 } else { 3073 end = doencodes(hunk, aptr, todo); 3074 aptr += todo; 3075 } 3076 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0); 3077 fromlen -= todo; 3078 } 3079 break; 3080 } 3081 } 3082 *cur = '\0'; 3083 SvCUR_set(cat, cur - start); 3084 no_change: 3085 *symptr = lookahead; 3086 } 3087 return beglist; 3088 } 3089 #undef NEXTFROM 3090 3091 3092 PP(pp_pack) 3093 { 3094 dVAR; dSP; dMARK; dORIGMARK; dTARGET; 3095 SV *cat = TARG; 3096 STRLEN fromlen; 3097 SV *pat_sv = *++MARK; 3098 const char *pat = SvPV_const(pat_sv, fromlen); 3099 const char *patend = pat + fromlen; 3100 3101 MARK++; 3102 sv_setpvs(cat, ""); 3103 SvUTF8_off(cat); 3104 3105 packlist(cat, pat, patend, MARK, SP + 1); 3106 3107 SvSETMAGIC(cat); 3108 SP = ORIGMARK; 3109 PUSHs(cat); 3110 RETURN; 3111 } 3112 3113 /* 3114 * Local variables: 3115 * c-indentation-style: bsd 3116 * c-basic-offset: 4 3117 * indent-tabs-mode: nil 3118 * End: 3119 * 3120 * ex: set ts=8 sts=4 sw=4 et: 3121 */ 3122