1 /* 2 $Id: Encode.xs,v 2.18 2009/11/26 09:23:59 dankogai Exp dankogai $ 3 */ 4 5 #define PERL_NO_GET_CONTEXT 6 #include "EXTERN.h" 7 #include "perl.h" 8 #include "XSUB.h" 9 #define U8 U8 10 #include "encode.h" 11 12 # define PERLIO_MODNAME "PerlIO::encoding" 13 # define PERLIO_FILENAME "PerlIO/encoding.pm" 14 15 /* set 1 or more to profile. t/encoding.t dumps core because of 16 Perl_warner and PerlIO don't work well */ 17 #define ENCODE_XS_PROFILE 0 18 19 /* set 0 to disable floating point to calculate buffer size for 20 encode_method(). 1 is recommended. 2 restores NI-S original */ 21 #define ENCODE_XS_USEFP 1 22 23 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ 24 Perl_croak(aTHX_ "panic_unimplemented"); \ 25 return (y)0; /* fool picky compilers */ \ 26 } 27 /**/ 28 29 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) 30 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) 31 32 #define UTF8_ALLOW_STRICT 0 33 #define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \ 34 ~(UTF8_ALLOW_CONTINUATION | \ 35 UTF8_ALLOW_NON_CONTINUATION | \ 36 UTF8_ALLOW_LONG)) 37 38 void 39 Encode_XSEncoding(pTHX_ encode_t * enc) 40 { 41 dSP; 42 HV *stash = gv_stashpv("Encode::XS", TRUE); 43 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); 44 int i = 0; 45 PUSHMARK(sp); 46 XPUSHs(sv); 47 while (enc->name[i]) { 48 const char *name = enc->name[i++]; 49 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); 50 } 51 PUTBACK; 52 call_pv("Encode::define_encoding", G_DISCARD); 53 SvREFCNT_dec(sv); 54 } 55 56 void 57 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) 58 { 59 /* Exists for breakpointing */ 60 } 61 62 63 #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" 64 #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" 65 66 static SV * 67 do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) 68 { 69 dSP; 70 int argc; 71 SV *retval = newSVpv("",0); 72 ENTER; 73 SAVETMPS; 74 PUSHMARK(sp); 75 XPUSHs(sv_2mortal(newSVnv((UV)ch))); 76 PUTBACK; 77 argc = call_sv(fallback_cb, G_SCALAR); 78 SPAGAIN; 79 if (argc != 1){ 80 croak("fallback sub must return scalar!"); 81 } 82 sv_catsv(retval, POPs); 83 PUTBACK; 84 FREETMPS; 85 LEAVE; 86 return retval; 87 } 88 89 static SV * 90 encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, 91 int check, STRLEN * offset, SV * term, int * retcode, 92 SV *fallback_cb) 93 { 94 STRLEN slen; 95 U8 *s = (U8 *) SvPV(src, slen); 96 STRLEN tlen = slen; 97 STRLEN ddone = 0; 98 STRLEN sdone = 0; 99 100 /* We allocate slen+1. 101 PerlIO dumps core if this value is smaller than this. */ 102 SV *dst = sv_2mortal(newSV(slen+1)); 103 U8 *d = (U8 *)SvPVX(dst); 104 STRLEN dlen = SvLEN(dst)-1; 105 int code = 0; 106 STRLEN trmlen = 0; 107 U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; 108 109 if (offset) { 110 s += *offset; 111 if (slen > *offset){ /* safeguard against slen overflow */ 112 slen -= *offset; 113 }else{ 114 slen = 0; 115 } 116 tlen = slen; 117 } 118 119 if (slen == 0){ 120 SvCUR_set(dst, 0); 121 SvPOK_only(dst); 122 goto ENCODE_END; 123 } 124 125 while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, 126 trm, trmlen)) ) 127 { 128 SvCUR_set(dst, dlen+ddone); 129 SvPOK_only(dst); 130 131 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || 132 code == ENCODE_FOUND_TERM) { 133 break; 134 } 135 switch (code) { 136 case ENCODE_NOSPACE: 137 { 138 STRLEN more = 0; /* make sure you initialize! */ 139 STRLEN sleft; 140 sdone += slen; 141 ddone += dlen; 142 sleft = tlen - sdone; 143 #if ENCODE_XS_PROFILE >= 2 144 Perl_warn(aTHX_ 145 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", 146 more, sdone, sleft, SvLEN(dst)); 147 #endif 148 if (sdone != 0) { /* has src ever been processed ? */ 149 #if ENCODE_XS_USEFP == 2 150 more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone 151 - SvLEN(dst); 152 #elif ENCODE_XS_USEFP 153 more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); 154 #else 155 /* safe until SvLEN(dst) == MAX_INT/16 */ 156 more = (16*SvLEN(dst)+1)/sdone/16 * sleft; 157 #endif 158 } 159 more += UTF8_MAXLEN; /* insurance policy */ 160 d = (U8 *) SvGROW(dst, SvLEN(dst) + more); 161 /* dst need to grow need MORE bytes! */ 162 if (ddone >= SvLEN(dst)) { 163 Perl_croak(aTHX_ "Destination couldn't be grown."); 164 } 165 dlen = SvLEN(dst)-ddone-1; 166 d += ddone; 167 s += slen; 168 slen = tlen-sdone; 169 continue; 170 } 171 case ENCODE_NOREP: 172 /* encoding */ 173 if (dir == enc->f_utf8) { 174 STRLEN clen; 175 UV ch = 176 utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), 177 &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); 178 /* if non-representable multibyte prefix at end of current buffer - break*/ 179 if (clen > tlen - sdone) break; 180 if (check & ENCODE_DIE_ON_ERR) { 181 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, 182 (UV)ch, enc->name[0]); 183 return &PL_sv_undef; /* never reaches but be safe */ 184 } 185 if (check & ENCODE_WARN_ON_ERR){ 186 Perl_warner(aTHX_ packWARN(WARN_UTF8), 187 ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); 188 } 189 if (check & ENCODE_RETURN_ON_ERR){ 190 goto ENCODE_SET_SRC; 191 } 192 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 193 SV* subchar = 194 (fallback_cb != &PL_sv_undef) 195 ? do_fallback_cb(aTHX_ ch, fallback_cb) 196 : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" : 197 check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : 198 "&#x%" UVxf ";", (UV)ch); 199 SvUTF8_off(subchar); /* make sure no decoded string gets in */ 200 sdone += slen + clen; 201 ddone += dlen + SvCUR(subchar); 202 sv_catsv(dst, subchar); 203 SvREFCNT_dec(subchar); 204 } else { 205 /* fallback char */ 206 sdone += slen + clen; 207 ddone += dlen + enc->replen; 208 sv_catpvn(dst, (char*)enc->rep, enc->replen); 209 } 210 } 211 /* decoding */ 212 else { 213 if (check & ENCODE_DIE_ON_ERR){ 214 Perl_croak(aTHX_ ERR_DECODE_NOMAP, 215 enc->name[0], (UV)s[slen]); 216 return &PL_sv_undef; /* never reaches but be safe */ 217 } 218 if (check & ENCODE_WARN_ON_ERR){ 219 Perl_warner( 220 aTHX_ packWARN(WARN_UTF8), 221 ERR_DECODE_NOMAP, 222 enc->name[0], (UV)s[slen]); 223 } 224 if (check & ENCODE_RETURN_ON_ERR){ 225 goto ENCODE_SET_SRC; 226 } 227 if (check & 228 (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 229 SV* subchar = 230 (fallback_cb != &PL_sv_undef) 231 ? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) 232 : newSVpvf("\\x%02" UVXf, (UV)s[slen]); 233 sdone += slen + 1; 234 ddone += dlen + SvCUR(subchar); 235 sv_catsv(dst, subchar); 236 SvREFCNT_dec(subchar); 237 } else { 238 sdone += slen + 1; 239 ddone += dlen + strlen(FBCHAR_UTF8); 240 sv_catpv(dst, FBCHAR_UTF8); 241 } 242 } 243 /* settle variables when fallback */ 244 d = (U8 *)SvEND(dst); 245 dlen = SvLEN(dst) - ddone - 1; 246 s = (U8*)SvPVX(src) + sdone; 247 slen = tlen - sdone; 248 break; 249 250 default: 251 Perl_croak(aTHX_ "Unexpected code %d converting %s %s", 252 code, (dir == enc->f_utf8) ? "to" : "from", 253 enc->name[0]); 254 return &PL_sv_undef; 255 } 256 } 257 ENCODE_SET_SRC: 258 if (check && !(check & ENCODE_LEAVE_SRC)){ 259 sdone = SvCUR(src) - (slen+sdone); 260 if (sdone) { 261 sv_setpvn(src, (char*)s+slen, sdone); 262 } 263 SvCUR_set(src, sdone); 264 } 265 /* warn("check = 0x%X, code = 0x%d\n", check, code); */ 266 267 SvCUR_set(dst, dlen+ddone); 268 SvPOK_only(dst); 269 270 #if ENCODE_XS_PROFILE 271 if (SvCUR(dst) > SvCUR(src)){ 272 Perl_warn(aTHX_ 273 "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", 274 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), 275 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); 276 } 277 #endif 278 279 if (offset) 280 *offset += sdone + slen; 281 282 ENCODE_END: 283 *SvEND(dst) = '\0'; 284 if (retcode) *retcode = code; 285 return dst; 286 } 287 288 static bool 289 strict_utf8(pTHX_ SV* sv) 290 { 291 HV* hv; 292 SV** svp; 293 sv = SvRV(sv); 294 if (!sv || SvTYPE(sv) != SVt_PVHV) 295 return 0; 296 hv = (HV*)sv; 297 svp = hv_fetch(hv, "strict_utf8", 11, 0); 298 if (!svp) 299 return 0; 300 return SvTRUE(*svp); 301 } 302 303 static U8* 304 process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, 305 bool encode, bool strict, bool stop_at_partial) 306 { 307 UV uv; 308 STRLEN ulen; 309 SV *fallback_cb; 310 int check; 311 312 if (SvROK(check_sv)) { 313 /* croak("UTF-8 decoder doesn't support callback CHECK"); */ 314 fallback_cb = check_sv; 315 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */ 316 } 317 else { 318 fallback_cb = &PL_sv_undef; 319 check = SvIV(check_sv); 320 } 321 322 SvPOK_only(dst); 323 SvCUR_set(dst,0); 324 325 while (s < e) { 326 if (UTF8_IS_INVARIANT(*s)) { 327 sv_catpvn(dst, (char *)s, 1); 328 s++; 329 continue; 330 } 331 332 if (UTF8_IS_START(*s)) { 333 U8 skip = UTF8SKIP(s); 334 if ((s + skip) > e) { 335 /* Partial character */ 336 /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */ 337 if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) 338 break; 339 340 goto malformed_byte; 341 } 342 343 uv = utf8n_to_uvuni(s, e - s, &ulen, 344 UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT : 345 UTF8_ALLOW_NONSTRICT) 346 ); 347 #if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */ 348 if (strict && uv > PERL_UNICODE_MAX) 349 ulen = (STRLEN) -1; 350 #endif 351 if (ulen == -1) { 352 if (strict) { 353 uv = utf8n_to_uvuni(s, e - s, &ulen, 354 UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT); 355 if (ulen == -1) 356 goto malformed_byte; 357 goto malformed; 358 } 359 goto malformed_byte; 360 } 361 362 363 /* Whole char is good */ 364 sv_catpvn(dst,(char *)s,skip); 365 s += skip; 366 continue; 367 } 368 369 /* If we get here there is something wrong with alleged UTF-8 */ 370 malformed_byte: 371 uv = (UV)*s; 372 ulen = 1; 373 374 malformed: 375 if (check & ENCODE_DIE_ON_ERR){ 376 if (encode) 377 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8"); 378 else 379 Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv); 380 } 381 if (check & ENCODE_WARN_ON_ERR){ 382 if (encode) 383 Perl_warner(aTHX_ packWARN(WARN_UTF8), 384 ERR_ENCODE_NOMAP, uv, "utf8"); 385 else 386 Perl_warner(aTHX_ packWARN(WARN_UTF8), 387 ERR_DECODE_NOMAP, "utf8", uv); 388 } 389 if (check & ENCODE_RETURN_ON_ERR) { 390 break; 391 } 392 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 393 SV* subchar = 394 (fallback_cb != &PL_sv_undef) 395 ? do_fallback_cb(aTHX_ uv, fallback_cb) 396 : newSVpvf(check & ENCODE_PERLQQ 397 ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") 398 : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" 399 : "&#x%" UVxf ";", uv); 400 if (encode){ 401 SvUTF8_off(subchar); /* make sure no decoded string gets in */ 402 } 403 sv_catsv(dst, subchar); 404 SvREFCNT_dec(subchar); 405 } else { 406 sv_catpv(dst, FBCHAR_UTF8); 407 } 408 s += ulen; 409 } 410 *SvEND(dst) = '\0'; 411 412 return s; 413 } 414 415 416 MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ 417 418 PROTOTYPES: DISABLE 419 420 void 421 Method_decode_xs(obj,src,check_sv = &PL_sv_no) 422 SV * obj 423 SV * src 424 SV * check_sv 425 PREINIT: 426 STRLEN slen; 427 U8 *s; 428 U8 *e; 429 SV *dst; 430 bool renewed = 0; 431 int check; 432 CODE: 433 { 434 dSP; ENTER; SAVETMPS; 435 if (src == &PL_sv_undef) src = newSV(0); 436 s = (U8 *) SvPV(src, slen); 437 e = (U8 *) SvEND(src); 438 dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ 439 check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); 440 /* 441 * PerlIO check -- we assume the object is of PerlIO if renewed 442 */ 443 PUSHMARK(sp); 444 XPUSHs(obj); 445 PUTBACK; 446 if (call_method("renewed",G_SCALAR) == 1) { 447 SPAGAIN; 448 renewed = (bool)POPi; 449 PUTBACK; 450 #if 0 451 fprintf(stderr, "renewed == %d\n", renewed); 452 #endif 453 } 454 FREETMPS; LEAVE; 455 /* end PerlIO check */ 456 457 if (SvUTF8(src)) { 458 s = utf8_to_bytes(s,&slen); 459 if (s) { 460 SvCUR_set(src,slen); 461 SvUTF8_off(src); 462 e = s+slen; 463 } 464 else { 465 croak("Cannot decode string with wide characters"); 466 } 467 } 468 469 s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed); 470 471 /* Clear out translated part of source unless asked not to */ 472 if (check && !(check & ENCODE_LEAVE_SRC)){ 473 slen = e-s; 474 if (slen) { 475 sv_setpvn(src, (char*)s, slen); 476 } 477 SvCUR_set(src, slen); 478 } 479 SvUTF8_on(dst); 480 ST(0) = sv_2mortal(dst); 481 XSRETURN(1); 482 } 483 484 void 485 Method_encode_xs(obj,src,check_sv = &PL_sv_no) 486 SV * obj 487 SV * src 488 SV * check_sv 489 PREINIT: 490 STRLEN slen; 491 U8 *s; 492 U8 *e; 493 SV *dst; 494 bool renewed = 0; 495 int check; 496 CODE: 497 { 498 check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); 499 if (src == &PL_sv_undef) src = newSV(0); 500 s = (U8 *) SvPV(src, slen); 501 e = (U8 *) SvEND(src); 502 dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ 503 if (SvUTF8(src)) { 504 /* Already encoded */ 505 if (strict_utf8(aTHX_ obj)) { 506 s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0); 507 } 508 else { 509 /* trust it and just copy the octets */ 510 sv_setpvn(dst,(char *)s,(e-s)); 511 s = e; 512 } 513 } 514 else { 515 /* Native bytes - can always encode */ 516 U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ 517 while (s < e) { 518 UV uv = NATIVE_TO_UNI((UV) *s); 519 s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */ 520 if (UNI_IS_INVARIANT(uv)) 521 *d++ = (U8)UTF_TO_NATIVE(uv); 522 else { 523 *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); 524 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); 525 } 526 } 527 SvCUR_set(dst, d- (U8 *)SvPVX(dst)); 528 *SvEND(dst) = '\0'; 529 } 530 531 /* Clear out translated part of source unless asked not to */ 532 if (check && !(check & ENCODE_LEAVE_SRC)){ 533 slen = e-s; 534 if (slen) { 535 sv_setpvn(src, (char*)s, slen); 536 } 537 SvCUR_set(src, slen); 538 } 539 SvPOK_only(dst); 540 SvUTF8_off(dst); 541 ST(0) = sv_2mortal(dst); 542 XSRETURN(1); 543 } 544 545 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ 546 547 PROTOTYPES: ENABLE 548 549 void 550 Method_renew(obj) 551 SV * obj 552 CODE: 553 { 554 XSRETURN(1); 555 } 556 557 int 558 Method_renewed(obj) 559 SV * obj 560 CODE: 561 RETVAL = 0; 562 OUTPUT: 563 RETVAL 564 565 void 566 Method_name(obj) 567 SV * obj 568 CODE: 569 { 570 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 571 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); 572 XSRETURN(1); 573 } 574 575 void 576 Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) 577 SV * obj 578 SV * dst 579 SV * src 580 SV * off 581 SV * term 582 SV * check_sv 583 CODE: 584 { 585 int check; 586 SV *fallback_cb = &PL_sv_undef; 587 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 588 STRLEN offset = (STRLEN)SvIV(off); 589 int code = 0; 590 if (SvUTF8(src)) { 591 sv_utf8_downgrade(src, FALSE); 592 } 593 if (SvROK(check_sv)){ 594 fallback_cb = check_sv; 595 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 596 }else{ 597 check = SvIV(check_sv); 598 } 599 sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, 600 &offset, term, &code, fallback_cb)); 601 SvIV_set(off, (IV)offset); 602 if (code == ENCODE_FOUND_TERM) { 603 ST(0) = &PL_sv_yes; 604 }else{ 605 ST(0) = &PL_sv_no; 606 } 607 XSRETURN(1); 608 } 609 610 void 611 Method_decode(obj,src,check_sv = &PL_sv_no) 612 SV * obj 613 SV * src 614 SV * check_sv 615 CODE: 616 { 617 int check; 618 SV *fallback_cb = &PL_sv_undef; 619 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 620 if (SvUTF8(src)) { 621 sv_utf8_downgrade(src, FALSE); 622 } 623 if (SvROK(check_sv)){ 624 fallback_cb = check_sv; 625 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 626 }else{ 627 check = SvIV(check_sv); 628 } 629 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, 630 NULL, Nullsv, NULL, fallback_cb); 631 SvUTF8_on(ST(0)); 632 XSRETURN(1); 633 } 634 635 void 636 Method_encode(obj,src,check_sv = &PL_sv_no) 637 SV * obj 638 SV * src 639 SV * check_sv 640 CODE: 641 { 642 int check; 643 SV *fallback_cb = &PL_sv_undef; 644 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 645 sv_utf8_upgrade(src); 646 if (SvROK(check_sv)){ 647 fallback_cb = check_sv; 648 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 649 }else{ 650 check = SvIV(check_sv); 651 } 652 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, 653 NULL, Nullsv, NULL, fallback_cb); 654 XSRETURN(1); 655 } 656 657 void 658 Method_needs_lines(obj) 659 SV * obj 660 CODE: 661 { 662 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ 663 ST(0) = &PL_sv_no; 664 XSRETURN(1); 665 } 666 667 void 668 Method_perlio_ok(obj) 669 SV * obj 670 CODE: 671 { 672 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ 673 /* require_pv(PERLIO_FILENAME); */ 674 675 eval_pv("require PerlIO::encoding", 0); 676 677 if (SvTRUE(get_sv("@", 0))) { 678 ST(0) = &PL_sv_no; 679 }else{ 680 ST(0) = &PL_sv_yes; 681 } 682 XSRETURN(1); 683 } 684 685 void 686 Method_mime_name(obj) 687 SV * obj 688 CODE: 689 { 690 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 691 SV *retval; 692 eval_pv("require Encode::MIME::Name", 0); 693 694 if (SvTRUE(get_sv("@", 0))) { 695 ST(0) = &PL_sv_undef; 696 }else{ 697 ENTER; 698 SAVETMPS; 699 PUSHMARK(sp); 700 XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); 701 PUTBACK; 702 call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); 703 SPAGAIN; 704 retval = newSVsv(POPs); 705 PUTBACK; 706 FREETMPS; 707 LEAVE; 708 /* enc->name[0] */ 709 ST(0) = retval; 710 } 711 XSRETURN(1); 712 } 713 714 MODULE = Encode PACKAGE = Encode 715 716 PROTOTYPES: ENABLE 717 718 I32 719 _bytes_to_utf8(sv, ...) 720 SV * sv 721 CODE: 722 { 723 SV * encoding = items == 2 ? ST(1) : Nullsv; 724 725 if (encoding) 726 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); 727 else { 728 STRLEN len; 729 U8* s = (U8*)SvPV(sv, len); 730 U8* converted; 731 732 converted = bytes_to_utf8(s, &len); /* This allocs */ 733 sv_setpvn(sv, (char *)converted, len); 734 SvUTF8_on(sv); /* XXX Should we? */ 735 Safefree(converted); /* ... so free it */ 736 RETVAL = len; 737 } 738 } 739 OUTPUT: 740 RETVAL 741 742 I32 743 _utf8_to_bytes(sv, ...) 744 SV * sv 745 CODE: 746 { 747 SV * to = items > 1 ? ST(1) : Nullsv; 748 SV * check = items > 2 ? ST(2) : Nullsv; 749 750 if (to) { 751 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); 752 } else { 753 STRLEN len; 754 U8 *s = (U8*)SvPV(sv, len); 755 756 RETVAL = 0; 757 if (SvTRUE(check)) { 758 /* Must do things the slow way */ 759 U8 *dest; 760 /* We need a copy to pass to check() */ 761 U8 *src = s; 762 U8 *send = s + len; 763 U8 *d0; 764 765 New(83, dest, len, U8); /* I think */ 766 d0 = dest; 767 768 while (s < send) { 769 if (*s < 0x80){ 770 *dest++ = *s++; 771 } else { 772 STRLEN ulen; 773 UV uv = *s++; 774 775 /* Have to do it all ourselves because of error routine, 776 aargh. */ 777 if (!(uv & 0x40)){ goto failure; } 778 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } 779 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } 780 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } 781 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } 782 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } 783 else if (!(uv & 0x01)) { ulen = 7; uv = 0; } 784 else { ulen = 13; uv = 0; } 785 786 /* Note change to utf8.c variable naming, for variety */ 787 while (ulen--) { 788 if ((*s & 0xc0) != 0x80){ 789 goto failure; 790 } else { 791 uv = (uv << 6) | (*s++ & 0x3f); 792 } 793 } 794 if (uv > 256) { 795 failure: 796 call_failure(check, s, dest, src); 797 /* Now what happens? */ 798 } 799 *dest++ = (U8)uv; 800 } 801 } 802 RETVAL = dest - d0; 803 sv_usepvn(sv, (char *)dest, RETVAL); 804 SvUTF8_off(sv); 805 } else { 806 RETVAL = (utf8_to_bytes(s, &len) ? len : 0); 807 } 808 } 809 } 810 OUTPUT: 811 RETVAL 812 813 bool 814 is_utf8(sv, check = 0) 815 SV * sv 816 int check 817 CODE: 818 { 819 if (SvGMAGICAL(sv)) /* it could be $1, for example */ 820 sv = newSVsv(sv); /* GMAGIG will be done */ 821 RETVAL = SvUTF8(sv) ? TRUE : FALSE; 822 if (RETVAL && 823 check && 824 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 825 RETVAL = FALSE; 826 if (sv != ST(0)) 827 SvREFCNT_dec(sv); /* it was a temp copy */ 828 } 829 OUTPUT: 830 RETVAL 831 832 SV * 833 _utf8_on(sv) 834 SV * sv 835 CODE: 836 { 837 if (SvPOK(sv)) { 838 SV *rsv = newSViv(SvUTF8(sv)); 839 RETVAL = rsv; 840 SvUTF8_on(sv); 841 } else { 842 RETVAL = &PL_sv_undef; 843 } 844 } 845 OUTPUT: 846 RETVAL 847 848 SV * 849 _utf8_off(sv) 850 SV * sv 851 CODE: 852 { 853 if (SvPOK(sv)) { 854 SV *rsv = newSViv(SvUTF8(sv)); 855 RETVAL = rsv; 856 SvUTF8_off(sv); 857 } else { 858 RETVAL = &PL_sv_undef; 859 } 860 } 861 OUTPUT: 862 RETVAL 863 864 int 865 DIE_ON_ERR() 866 CODE: 867 RETVAL = ENCODE_DIE_ON_ERR; 868 OUTPUT: 869 RETVAL 870 871 int 872 WARN_ON_ERR() 873 CODE: 874 RETVAL = ENCODE_WARN_ON_ERR; 875 OUTPUT: 876 RETVAL 877 878 int 879 LEAVE_SRC() 880 CODE: 881 RETVAL = ENCODE_LEAVE_SRC; 882 OUTPUT: 883 RETVAL 884 885 int 886 RETURN_ON_ERR() 887 CODE: 888 RETVAL = ENCODE_RETURN_ON_ERR; 889 OUTPUT: 890 RETVAL 891 892 int 893 PERLQQ() 894 CODE: 895 RETVAL = ENCODE_PERLQQ; 896 OUTPUT: 897 RETVAL 898 899 int 900 HTMLCREF() 901 CODE: 902 RETVAL = ENCODE_HTMLCREF; 903 OUTPUT: 904 RETVAL 905 906 int 907 XMLCREF() 908 CODE: 909 RETVAL = ENCODE_XMLCREF; 910 OUTPUT: 911 RETVAL 912 913 int 914 STOP_AT_PARTIAL() 915 CODE: 916 RETVAL = ENCODE_STOP_AT_PARTIAL; 917 OUTPUT: 918 RETVAL 919 920 int 921 FB_DEFAULT() 922 CODE: 923 RETVAL = ENCODE_FB_DEFAULT; 924 OUTPUT: 925 RETVAL 926 927 int 928 FB_CROAK() 929 CODE: 930 RETVAL = ENCODE_FB_CROAK; 931 OUTPUT: 932 RETVAL 933 934 int 935 FB_QUIET() 936 CODE: 937 RETVAL = ENCODE_FB_QUIET; 938 OUTPUT: 939 RETVAL 940 941 int 942 FB_WARN() 943 CODE: 944 RETVAL = ENCODE_FB_WARN; 945 OUTPUT: 946 RETVAL 947 948 int 949 FB_PERLQQ() 950 CODE: 951 RETVAL = ENCODE_FB_PERLQQ; 952 OUTPUT: 953 RETVAL 954 955 int 956 FB_HTMLCREF() 957 CODE: 958 RETVAL = ENCODE_FB_HTMLCREF; 959 OUTPUT: 960 RETVAL 961 962 int 963 FB_XMLCREF() 964 CODE: 965 RETVAL = ENCODE_FB_XMLCREF; 966 OUTPUT: 967 RETVAL 968 969 BOOT: 970 { 971 #include "def_t.h" 972 #include "def_t.exh" 973 } 974