xref: /openbsd/gnu/usr.bin/perl/doop.c (revision 404b540a)
1 /*    doop.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'So that was the job I felt I had to do when I started,' thought Sam.
13  *
14  *     [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"]
15  */
16 
17 /* This file contains some common functions needed to carry out certain
18  * ops. For example both pp_schomp() and pp_chomp() - scalar and array
19  * chomp operations - call the function do_chomp() found in this file.
20  */
21 
22 #include "EXTERN.h"
23 #define PERL_IN_DOOP_C
24 #include "perl.h"
25 
26 #ifndef PERL_MICRO
27 #include <signal.h>
28 #endif
29 
30 STATIC I32
31 S_do_trans_simple(pTHX_ SV * const sv)
32 {
33     dVAR;
34     I32 matches = 0;
35     STRLEN len;
36     U8 *s = (U8*)SvPV(sv,len);
37     U8 * const send = s+len;
38     const short * const tbl = (short*)cPVOP->op_pv;
39 
40     PERL_ARGS_ASSERT_DO_TRANS_SIMPLE;
41 
42     if (!tbl)
43 	Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
44 
45     /* First, take care of non-UTF-8 input strings, because they're easy */
46     if (!SvUTF8(sv)) {
47 	while (s < send) {
48 	    const I32 ch = tbl[*s];
49 	    if (ch >= 0) {
50 		matches++;
51 		*s = (U8)ch;
52 	    }
53 	    s++;
54 	}
55 	SvSETMAGIC(sv);
56     }
57     else {
58 	const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
59 	U8 *d;
60 	U8 *dstart;
61 
62 	/* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
63 	if (grows)
64 	    Newx(d, len*2+1, U8);
65 	else
66 	    d = s;
67 	dstart = d;
68 	while (s < send) {
69 	    STRLEN ulen;
70 	    I32 ch;
71 
72 	    /* Need to check this, otherwise 128..255 won't match */
73 	    const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
74 	    if (c < 0x100 && (ch = tbl[c]) >= 0) {
75 		matches++;
76 		d = uvchr_to_utf8(d, ch);
77 		s += ulen;
78 	    }
79 	    else { /* No match -> copy */
80 		Move(s, d, ulen, U8);
81 		d += ulen;
82 		s += ulen;
83 	    }
84 	}
85 	if (grows) {
86 	    sv_setpvn(sv, (char*)dstart, d - dstart);
87 	    Safefree(dstart);
88 	}
89 	else {
90 	    *d = '\0';
91 	    SvCUR_set(sv, d - dstart);
92 	}
93 	SvUTF8_on(sv);
94 	SvSETMAGIC(sv);
95     }
96     return matches;
97 }
98 
99 STATIC I32
100 S_do_trans_count(pTHX_ SV * const sv)
101 {
102     dVAR;
103     STRLEN len;
104     const U8 *s = (const U8*)SvPV_const(sv, len);
105     const U8 * const send = s + len;
106     I32 matches = 0;
107     const short * const tbl = (short*)cPVOP->op_pv;
108 
109     PERL_ARGS_ASSERT_DO_TRANS_COUNT;
110 
111     if (!tbl)
112 	Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
113 
114     if (!SvUTF8(sv)) {
115 	while (s < send) {
116             if (tbl[*s++] >= 0)
117                 matches++;
118 	}
119     }
120     else {
121 	const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
122 	while (s < send) {
123 	    STRLEN ulen;
124 	    const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
125 	    if (c < 0x100) {
126 		if (tbl[c] >= 0)
127 		    matches++;
128 	    } else if (complement)
129 		matches++;
130 	    s += ulen;
131 	}
132     }
133 
134     return matches;
135 }
136 
137 STATIC I32
138 S_do_trans_complex(pTHX_ SV * const sv)
139 {
140     dVAR;
141     STRLEN len;
142     U8 *s = (U8*)SvPV(sv, len);
143     U8 * const send = s+len;
144     I32 matches = 0;
145     const short * const tbl = (short*)cPVOP->op_pv;
146 
147     PERL_ARGS_ASSERT_DO_TRANS_COMPLEX;
148 
149     if (!tbl)
150 	Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
151 
152     if (!SvUTF8(sv)) {
153 	U8 *d = s;
154 	U8 * const dstart = d;
155 
156 	if (PL_op->op_private & OPpTRANS_SQUASH) {
157 	    const U8* p = send;
158 	    while (s < send) {
159 		const I32 ch = tbl[*s];
160 		if (ch >= 0) {
161 		    *d = (U8)ch;
162 		    matches++;
163 		    if (p != d - 1 || *p != *d)
164 			p = d++;
165 		}
166 		else if (ch == -1)	/* -1 is unmapped character */
167 		    *d++ = *s;
168 		else if (ch == -2)	/* -2 is delete character */
169 		    matches++;
170 		s++;
171 	    }
172 	}
173 	else {
174 	    while (s < send) {
175 		const I32 ch = tbl[*s];
176 		if (ch >= 0) {
177 		    matches++;
178 		    *d++ = (U8)ch;
179 		}
180 		else if (ch == -1)	/* -1 is unmapped character */
181 		    *d++ = *s;
182 		else if (ch == -2)      /* -2 is delete character */
183 		    matches++;
184 		s++;
185 	    }
186 	}
187 	*d = '\0';
188 	SvCUR_set(sv, d - dstart);
189     }
190     else { /* is utf8 */
191 	const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
192 	const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
193 	const I32 del = PL_op->op_private & OPpTRANS_DELETE;
194 	U8 *d;
195 	U8 *dstart;
196 	STRLEN rlen = 0;
197 
198 	if (grows)
199 	    Newx(d, len*2+1, U8);
200 	else
201 	    d = s;
202 	dstart = d;
203 	if (complement && !del)
204 	    rlen = tbl[0x100];
205 
206 #ifdef MACOS_TRADITIONAL
207 #define comp CoMP   /* "comp" is a keyword in some compilers ... */
208 #endif
209 
210 	if (PL_op->op_private & OPpTRANS_SQUASH) {
211 	    UV pch = 0xfeedface;
212 	    while (s < send) {
213 		STRLEN len;
214 		const UV comp = utf8n_to_uvchr(s, send - s, &len,
215 					       UTF8_ALLOW_DEFAULT);
216 		I32 ch;
217 
218 		if (comp > 0xff) {
219 		    if (!complement) {
220 			Move(s, d, len, U8);
221 			d += len;
222 		    }
223 		    else {
224 			matches++;
225 			if (!del) {
226 			    ch = (rlen == 0) ? (I32)comp :
227 				(comp - 0x100 < rlen) ?
228 				tbl[comp+1] : tbl[0x100+rlen];
229 			    if ((UV)ch != pch) {
230 				d = uvchr_to_utf8(d, ch);
231 				pch = (UV)ch;
232 			    }
233 			    s += len;
234 			    continue;
235 			}
236 		    }
237 		}
238 		else if ((ch = tbl[comp]) >= 0) {
239 		    matches++;
240 		    if ((UV)ch != pch) {
241 		        d = uvchr_to_utf8(d, ch);
242 		        pch = (UV)ch;
243 		    }
244 		    s += len;
245 		    continue;
246 		}
247 		else if (ch == -1) {	/* -1 is unmapped character */
248 		    Move(s, d, len, U8);
249 		    d += len;
250 		}
251 		else if (ch == -2)      /* -2 is delete character */
252 		    matches++;
253 		s += len;
254 		pch = 0xfeedface;
255 	    }
256 	}
257 	else {
258 	    while (s < send) {
259 		STRLEN len;
260 		const UV comp = utf8n_to_uvchr(s, send - s, &len,
261 					       UTF8_ALLOW_DEFAULT);
262 		I32 ch;
263 		if (comp > 0xff) {
264 		    if (!complement) {
265 			Move(s, d, len, U8);
266 			d += len;
267 		    }
268 		    else {
269 			matches++;
270 			if (!del) {
271 			    if (comp - 0x100 < rlen)
272 				d = uvchr_to_utf8(d, tbl[comp+1]);
273 			    else
274 				d = uvchr_to_utf8(d, tbl[0x100+rlen]);
275 			}
276 		    }
277 		}
278 		else if ((ch = tbl[comp]) >= 0) {
279 		    d = uvchr_to_utf8(d, ch);
280 		    matches++;
281 		}
282 		else if (ch == -1) {	/* -1 is unmapped character */
283 		    Move(s, d, len, U8);
284 		    d += len;
285 		}
286 		else if (ch == -2)      /* -2 is delete character */
287 		    matches++;
288 		s += len;
289 	    }
290 	}
291 	if (grows) {
292 	    sv_setpvn(sv, (char*)dstart, d - dstart);
293 	    Safefree(dstart);
294 	}
295 	else {
296 	    *d = '\0';
297 	    SvCUR_set(sv, d - dstart);
298 	}
299 	SvUTF8_on(sv);
300     }
301     SvSETMAGIC(sv);
302     return matches;
303 }
304 
305 STATIC I32
306 S_do_trans_simple_utf8(pTHX_ SV * const sv)
307 {
308     dVAR;
309     U8 *s;
310     U8 *send;
311     U8 *d;
312     U8 *start;
313     U8 *dstart, *dend;
314     I32 matches = 0;
315     const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
316     STRLEN len;
317     SV* const  rv =
318 #ifdef USE_ITHREADS
319 		    PAD_SVl(cPADOP->op_padix);
320 #else
321 		    MUTABLE_SV(cSVOP->op_sv);
322 #endif
323     HV* const  hv = MUTABLE_HV(SvRV(rv));
324     SV* const * svp = hv_fetchs(hv, "NONE", FALSE);
325     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
326     const UV extra = none + 1;
327     UV final = 0;
328     U8 hibit = 0;
329 
330     PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8;
331 
332     s = (U8*)SvPV(sv, len);
333     if (!SvUTF8(sv)) {
334 	const U8 *t = s;
335 	const U8 * const e = s + len;
336 	while (t < e) {
337 	    const U8 ch = *t++;
338 	    hibit = !NATIVE_IS_INVARIANT(ch);
339 	    if (hibit) {
340 		s = bytes_to_utf8(s, &len);
341 		break;
342 	    }
343 	}
344     }
345     send = s + len;
346     start = s;
347 
348     svp = hv_fetchs(hv, "FINAL", FALSE);
349     if (svp)
350 	final = SvUV(*svp);
351 
352     if (grows) {
353 	/* d needs to be bigger than s, in case e.g. upgrading is required */
354 	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
355 	dend = d + len * 3;
356 	dstart = d;
357     }
358     else {
359 	dstart = d = s;
360 	dend = d + len;
361     }
362 
363     while (s < send) {
364 	const UV uv = swash_fetch(rv, s, TRUE);
365 	if (uv < none) {
366 	    s += UTF8SKIP(s);
367 	    matches++;
368 	    d = uvuni_to_utf8(d, uv);
369 	}
370 	else if (uv == none) {
371 	    const int i = UTF8SKIP(s);
372 	    Move(s, d, i, U8);
373 	    d += i;
374 	    s += i;
375 	}
376 	else if (uv == extra) {
377 	    s += UTF8SKIP(s);
378 	    matches++;
379 	    d = uvuni_to_utf8(d, final);
380 	}
381 	else
382 	    s += UTF8SKIP(s);
383 
384 	if (d > dend) {
385 	    const STRLEN clen = d - dstart;
386 	    const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
387 	    if (!grows)
388 		Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
389 	    Renew(dstart, nlen + UTF8_MAXBYTES, U8);
390 	    d = dstart + clen;
391 	    dend = dstart + nlen;
392 	}
393     }
394     if (grows || hibit) {
395 	sv_setpvn(sv, (char*)dstart, d - dstart);
396 	Safefree(dstart);
397 	if (grows && hibit)
398 	    Safefree(start);
399     }
400     else {
401 	*d = '\0';
402 	SvCUR_set(sv, d - dstart);
403     }
404     SvSETMAGIC(sv);
405     SvUTF8_on(sv);
406 
407     return matches;
408 }
409 
410 STATIC I32
411 S_do_trans_count_utf8(pTHX_ SV * const sv)
412 {
413     dVAR;
414     const U8 *s;
415     const U8 *start = NULL;
416     const U8 *send;
417     I32 matches = 0;
418     STRLEN len;
419     SV* const  rv =
420 #ifdef USE_ITHREADS
421 		    PAD_SVl(cPADOP->op_padix);
422 #else
423 		    MUTABLE_SV(cSVOP->op_sv);
424 #endif
425     HV* const hv = MUTABLE_HV(SvRV(rv));
426     SV* const * const svp = hv_fetchs(hv, "NONE", FALSE);
427     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
428     const UV extra = none + 1;
429     U8 hibit = 0;
430 
431     PERL_ARGS_ASSERT_DO_TRANS_COUNT_UTF8;
432 
433     s = (const U8*)SvPV_const(sv, len);
434     if (!SvUTF8(sv)) {
435 	const U8 *t = s;
436 	const U8 * const e = s + len;
437 	while (t < e) {
438 	    const U8 ch = *t++;
439 	    hibit = !NATIVE_IS_INVARIANT(ch);
440 	    if (hibit) {
441 		start = s = bytes_to_utf8(s, &len);
442 		break;
443 	    }
444 	}
445     }
446     send = s + len;
447 
448     while (s < send) {
449 	const UV uv = swash_fetch(rv, s, TRUE);
450 	if (uv < none || uv == extra)
451 	    matches++;
452 	s += UTF8SKIP(s);
453     }
454     if (hibit)
455         Safefree(start);
456 
457     return matches;
458 }
459 
460 STATIC I32
461 S_do_trans_complex_utf8(pTHX_ SV * const sv)
462 {
463     dVAR;
464     U8 *start, *send;
465     U8 *d;
466     I32 matches = 0;
467     const I32 squash   = PL_op->op_private & OPpTRANS_SQUASH;
468     const I32 del      = PL_op->op_private & OPpTRANS_DELETE;
469     const I32 grows    = PL_op->op_private & OPpTRANS_GROWS;
470     SV* const  rv =
471 #ifdef USE_ITHREADS
472 		    PAD_SVl(cPADOP->op_padix);
473 #else
474 		    MUTABLE_SV(cSVOP->op_sv);
475 #endif
476     HV * const hv = MUTABLE_HV(SvRV(rv));
477     SV * const *svp = hv_fetchs(hv, "NONE", FALSE);
478     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
479     const UV extra = none + 1;
480     UV final = 0;
481     bool havefinal = FALSE;
482     STRLEN len;
483     U8 *dstart, *dend;
484     U8 hibit = 0;
485     U8 *s = (U8*)SvPV(sv, len);
486 
487     PERL_ARGS_ASSERT_DO_TRANS_COMPLEX_UTF8;
488 
489     if (!SvUTF8(sv)) {
490 	const U8 *t = s;
491 	const U8 * const e = s + len;
492 	while (t < e) {
493 	    const U8 ch = *t++;
494 	    hibit = !NATIVE_IS_INVARIANT(ch);
495 	    if (hibit) {
496 		s = bytes_to_utf8(s, &len);
497 		break;
498 	    }
499 	}
500     }
501     send = s + len;
502     start = s;
503 
504     svp = hv_fetchs(hv, "FINAL", FALSE);
505     if (svp) {
506 	final = SvUV(*svp);
507 	havefinal = TRUE;
508     }
509 
510     if (grows) {
511 	/* d needs to be bigger than s, in case e.g. upgrading is required */
512 	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
513 	dend = d + len * 3;
514 	dstart = d;
515     }
516     else {
517 	dstart = d = s;
518 	dend = d + len;
519     }
520 
521     if (squash) {
522 	UV puv = 0xfeedface;
523 	while (s < send) {
524 	    UV uv = swash_fetch(rv, s, TRUE);
525 
526 	    if (d > dend) {
527 		const STRLEN clen = d - dstart;
528 		const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
529 		if (!grows)
530 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
531 		Renew(dstart, nlen + UTF8_MAXBYTES, U8);
532 		d = dstart + clen;
533 		dend = dstart + nlen;
534 	    }
535 	    if (uv < none) {
536 		matches++;
537 		s += UTF8SKIP(s);
538 		if (uv != puv) {
539 		    d = uvuni_to_utf8(d, uv);
540 		    puv = uv;
541 		}
542 		continue;
543 	    }
544 	    else if (uv == none) {	/* "none" is unmapped character */
545 		const int i = UTF8SKIP(s);
546 		Move(s, d, i, U8);
547 		d += i;
548 		s += i;
549 		puv = 0xfeedface;
550 		continue;
551 	    }
552 	    else if (uv == extra && !del) {
553 		matches++;
554 		if (havefinal) {
555 		    s += UTF8SKIP(s);
556 		    if (puv != final) {
557 			d = uvuni_to_utf8(d, final);
558 			puv = final;
559 		    }
560 		}
561 		else {
562 		    STRLEN len;
563 		    uv = utf8n_to_uvuni(s, send - s, &len, UTF8_ALLOW_DEFAULT);
564 		    if (uv != puv) {
565 			Move(s, d, len, U8);
566 			d += len;
567 			puv = uv;
568 		    }
569 		    s += len;
570 		}
571 		continue;
572 	    }
573 	    matches++;			/* "none+1" is delete character */
574 	    s += UTF8SKIP(s);
575 	}
576     }
577     else {
578 	while (s < send) {
579 	    const UV uv = swash_fetch(rv, s, TRUE);
580 	    if (d > dend) {
581 	        const STRLEN clen = d - dstart;
582 		const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
583 		if (!grows)
584 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
585 		Renew(dstart, nlen + UTF8_MAXBYTES, U8);
586 		d = dstart + clen;
587 		dend = dstart + nlen;
588 	    }
589 	    if (uv < none) {
590 		matches++;
591 		s += UTF8SKIP(s);
592 		d = uvuni_to_utf8(d, uv);
593 		continue;
594 	    }
595 	    else if (uv == none) {	/* "none" is unmapped character */
596 		const int i = UTF8SKIP(s);
597 		Move(s, d, i, U8);
598 		d += i;
599 		s += i;
600 		continue;
601 	    }
602 	    else if (uv == extra && !del) {
603 		matches++;
604 		s += UTF8SKIP(s);
605 		d = uvuni_to_utf8(d, final);
606 		continue;
607 	    }
608 	    matches++;			/* "none+1" is delete character */
609 	    s += UTF8SKIP(s);
610 	}
611     }
612     if (grows || hibit) {
613 	sv_setpvn(sv, (char*)dstart, d - dstart);
614 	Safefree(dstart);
615 	if (grows && hibit)
616 	    Safefree(start);
617     }
618     else {
619 	*d = '\0';
620 	SvCUR_set(sv, d - dstart);
621     }
622     SvUTF8_on(sv);
623     SvSETMAGIC(sv);
624 
625     return matches;
626 }
627 
628 I32
629 Perl_do_trans(pTHX_ SV *sv)
630 {
631     dVAR;
632     STRLEN len;
633     const I32 hasutf = (PL_op->op_private &
634                     (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
635 
636     PERL_ARGS_ASSERT_DO_TRANS;
637 
638     if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) {
639         if (SvIsCOW(sv))
640             sv_force_normal_flags(sv, 0);
641         if (SvREADONLY(sv))
642             Perl_croak(aTHX_ "%s", PL_no_modify);
643     }
644     (void)SvPV_const(sv, len);
645     if (!len)
646 	return 0;
647     if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) {
648 	if (!SvPOKp(sv))
649 	    (void)SvPV_force(sv, len);
650 	(void)SvPOK_only_UTF8(sv);
651     }
652 
653     DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
654 
655     switch (PL_op->op_private & ~hasutf & (
656 		OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL|
657 		OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
658     case 0:
659 	if (hasutf)
660 	    return do_trans_simple_utf8(sv);
661 	else
662 	    return do_trans_simple(sv);
663 
664     case OPpTRANS_IDENTICAL:
665     case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT:
666 	if (hasutf)
667 	    return do_trans_count_utf8(sv);
668 	else
669 	    return do_trans_count(sv);
670 
671     default:
672 	if (hasutf)
673 	    return do_trans_complex_utf8(sv);
674 	else
675 	    return do_trans_complex(sv);
676     }
677 }
678 
679 void
680 Perl_do_join(pTHX_ register SV *sv, SV *delim, register SV **mark, register SV **sp)
681 {
682     dVAR;
683     SV ** const oldmark = mark;
684     register I32 items = sp - mark;
685     register STRLEN len;
686     STRLEN delimlen;
687 
688     PERL_ARGS_ASSERT_DO_JOIN;
689 
690     (void) SvPV_const(delim, delimlen); /* stringify and get the delimlen */
691     /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
692 
693     mark++;
694     len = (items > 0 ? (delimlen * (items - 1) ) : 0);
695     SvUPGRADE(sv, SVt_PV);
696     if (SvLEN(sv) < len + items) {	/* current length is way too short */
697 	while (items-- > 0) {
698 	    if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
699 		STRLEN tmplen;
700 		SvPV_const(*mark, tmplen);
701 		len += tmplen;
702 	    }
703 	    mark++;
704 	}
705 	SvGROW(sv, len + 1);		/* so try to pre-extend */
706 
707 	mark = oldmark;
708 	items = sp - mark;
709 	++mark;
710     }
711 
712     sv_setpvs(sv, "");
713     /* sv_setpv retains old UTF8ness [perl #24846] */
714     SvUTF8_off(sv);
715 
716     if (PL_tainting && SvMAGICAL(sv))
717 	SvTAINTED_off(sv);
718 
719     if (items-- > 0) {
720 	if (*mark)
721 	    sv_catsv(sv, *mark);
722 	mark++;
723     }
724 
725     if (delimlen) {
726 	for (; items > 0; items--,mark++) {
727 	    sv_catsv(sv,delim);
728 	    sv_catsv(sv,*mark);
729 	}
730     }
731     else {
732 	for (; items > 0; items--,mark++)
733 	    sv_catsv(sv,*mark);
734     }
735     SvSETMAGIC(sv);
736 }
737 
738 void
739 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
740 {
741     dVAR;
742     STRLEN patlen;
743     const char * const pat = SvPV_const(*sarg, patlen);
744     bool do_taint = FALSE;
745 
746     PERL_ARGS_ASSERT_DO_SPRINTF;
747 
748     SvUTF8_off(sv);
749     if (DO_UTF8(*sarg))
750         SvUTF8_on(sv);
751     sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, len - 1, &do_taint);
752     SvSETMAGIC(sv);
753     if (do_taint)
754 	SvTAINTED_on(sv);
755 }
756 
757 /* currently converts input to bytes if possible, but doesn't sweat failure */
758 UV
759 Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
760 {
761     dVAR;
762     STRLEN srclen, len, uoffset, bitoffs = 0;
763     const unsigned char *s = (const unsigned char *) SvPV_const(sv, srclen);
764     UV retnum = 0;
765 
766     PERL_ARGS_ASSERT_DO_VECGET;
767 
768     if (offset < 0)
769 	return 0;
770     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
771 	Perl_croak(aTHX_ "Illegal number of bits in vec");
772 
773     if (SvUTF8(sv))
774 	(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
775 
776     if (size < 8) {
777 	bitoffs = ((offset%8)*size)%8;
778 	uoffset = offset/(8/size);
779     }
780     else if (size > 8)
781 	uoffset = offset*(size/8);
782     else
783 	uoffset = offset;
784 
785     len = uoffset + (bitoffs + size + 7)/8;	/* required number of bytes */
786     if (len > srclen) {
787 	if (size <= 8)
788 	    retnum = 0;
789 	else {
790 	    if (size == 16) {
791 		if (uoffset >= srclen)
792 		    retnum = 0;
793 		else
794 		    retnum = (UV) s[uoffset] <<  8;
795 	    }
796 	    else if (size == 32) {
797 		if (uoffset >= srclen)
798 		    retnum = 0;
799 		else if (uoffset + 1 >= srclen)
800 		    retnum =
801 			((UV) s[uoffset    ] << 24);
802 		else if (uoffset + 2 >= srclen)
803 		    retnum =
804 			((UV) s[uoffset    ] << 24) +
805 			((UV) s[uoffset + 1] << 16);
806 		else
807 		    retnum =
808 			((UV) s[uoffset    ] << 24) +
809 			((UV) s[uoffset + 1] << 16) +
810 			(     s[uoffset + 2] <<  8);
811 	    }
812 #ifdef UV_IS_QUAD
813 	    else if (size == 64) {
814 		if (ckWARN(WARN_PORTABLE))
815 		    Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
816 				"Bit vector size > 32 non-portable");
817 		if (uoffset >= srclen)
818 		    retnum = 0;
819 		else if (uoffset + 1 >= srclen)
820 		    retnum =
821 			(UV) s[uoffset     ] << 56;
822 		else if (uoffset + 2 >= srclen)
823 		    retnum =
824 			((UV) s[uoffset    ] << 56) +
825 			((UV) s[uoffset + 1] << 48);
826 		else if (uoffset + 3 >= srclen)
827 		    retnum =
828 			((UV) s[uoffset    ] << 56) +
829 			((UV) s[uoffset + 1] << 48) +
830 			((UV) s[uoffset + 2] << 40);
831 		else if (uoffset + 4 >= srclen)
832 		    retnum =
833 			((UV) s[uoffset    ] << 56) +
834 			((UV) s[uoffset + 1] << 48) +
835 			((UV) s[uoffset + 2] << 40) +
836 			((UV) s[uoffset + 3] << 32);
837 		else if (uoffset + 5 >= srclen)
838 		    retnum =
839 			((UV) s[uoffset    ] << 56) +
840 			((UV) s[uoffset + 1] << 48) +
841 			((UV) s[uoffset + 2] << 40) +
842 			((UV) s[uoffset + 3] << 32) +
843 			(     s[uoffset + 4] << 24);
844 		else if (uoffset + 6 >= srclen)
845 		    retnum =
846 			((UV) s[uoffset    ] << 56) +
847 			((UV) s[uoffset + 1] << 48) +
848 			((UV) s[uoffset + 2] << 40) +
849 			((UV) s[uoffset + 3] << 32) +
850 			((UV) s[uoffset + 4] << 24) +
851 			((UV) s[uoffset + 5] << 16);
852 		else
853 		    retnum =
854 			((UV) s[uoffset    ] << 56) +
855 			((UV) s[uoffset + 1] << 48) +
856 			((UV) s[uoffset + 2] << 40) +
857 			((UV) s[uoffset + 3] << 32) +
858 			((UV) s[uoffset + 4] << 24) +
859 			((UV) s[uoffset + 5] << 16) +
860 			(     s[uoffset + 6] <<  8);
861 	    }
862 #endif
863 	}
864     }
865     else if (size < 8)
866 	retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1);
867     else {
868 	if (size == 8)
869 	    retnum = s[uoffset];
870 	else if (size == 16)
871 	    retnum =
872 		((UV) s[uoffset] <<      8) +
873 		      s[uoffset + 1];
874 	else if (size == 32)
875 	    retnum =
876 		((UV) s[uoffset    ] << 24) +
877 		((UV) s[uoffset + 1] << 16) +
878 		(     s[uoffset + 2] <<  8) +
879 		      s[uoffset + 3];
880 #ifdef UV_IS_QUAD
881 	else if (size == 64) {
882 	    if (ckWARN(WARN_PORTABLE))
883 		Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
884 			    "Bit vector size > 32 non-portable");
885 	    retnum =
886 		((UV) s[uoffset    ] << 56) +
887 		((UV) s[uoffset + 1] << 48) +
888 		((UV) s[uoffset + 2] << 40) +
889 		((UV) s[uoffset + 3] << 32) +
890 		((UV) s[uoffset + 4] << 24) +
891 		((UV) s[uoffset + 5] << 16) +
892 		(     s[uoffset + 6] <<  8) +
893 		      s[uoffset + 7];
894 	}
895 #endif
896     }
897 
898     return retnum;
899 }
900 
901 /* currently converts input to bytes if possible but doesn't sweat failures,
902  * although it does ensure that the string it clobbers is not marked as
903  * utf8-valid any more
904  */
905 void
906 Perl_do_vecset(pTHX_ SV *sv)
907 {
908     dVAR;
909     register I32 offset, bitoffs = 0;
910     register I32 size;
911     register unsigned char *s;
912     register UV lval;
913     I32 mask;
914     STRLEN targlen;
915     STRLEN len;
916     SV * const targ = LvTARG(sv);
917 
918     PERL_ARGS_ASSERT_DO_VECSET;
919 
920     if (!targ)
921 	return;
922     s = (unsigned char*)SvPV_force(targ, targlen);
923     if (SvUTF8(targ)) {
924 	/* This is handled by the SvPOK_only below...
925 	if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
926 	    SvUTF8_off(targ);
927 	 */
928 	(void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
929     }
930 
931     (void)SvPOK_only(targ);
932     lval = SvUV(sv);
933     offset = LvTARGOFF(sv);
934     if (offset < 0)
935 	Perl_croak(aTHX_ "Negative offset to vec in lvalue context");
936     size = LvTARGLEN(sv);
937     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
938 	Perl_croak(aTHX_ "Illegal number of bits in vec");
939 
940     if (size < 8) {
941 	bitoffs = ((offset%8)*size)%8;
942 	offset /= 8/size;
943     }
944     else if (size > 8)
945 	offset *= size/8;
946 
947     len = offset + (bitoffs + size + 7)/8;	/* required number of bytes */
948     if (len > targlen) {
949 	s = (unsigned char*)SvGROW(targ, len + 1);
950 	(void)memzero((char *)(s + targlen), len - targlen + 1);
951 	SvCUR_set(targ, len);
952     }
953 
954     if (size < 8) {
955 	mask = (1 << size) - 1;
956 	lval &= mask;
957 	s[offset] &= ~(mask << bitoffs);
958 	s[offset] |= lval << bitoffs;
959     }
960     else {
961 	if (size == 8)
962 	    s[offset  ] = (U8)( lval        & 0xff);
963 	else if (size == 16) {
964 	    s[offset  ] = (U8)((lval >>  8) & 0xff);
965 	    s[offset+1] = (U8)( lval        & 0xff);
966 	}
967 	else if (size == 32) {
968 	    s[offset  ] = (U8)((lval >> 24) & 0xff);
969 	    s[offset+1] = (U8)((lval >> 16) & 0xff);
970 	    s[offset+2] = (U8)((lval >>  8) & 0xff);
971 	    s[offset+3] = (U8)( lval        & 0xff);
972 	}
973 #ifdef UV_IS_QUAD
974 	else if (size == 64) {
975 	    if (ckWARN(WARN_PORTABLE))
976 		Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
977 			    "Bit vector size > 32 non-portable");
978 	    s[offset  ] = (U8)((lval >> 56) & 0xff);
979 	    s[offset+1] = (U8)((lval >> 48) & 0xff);
980 	    s[offset+2] = (U8)((lval >> 40) & 0xff);
981 	    s[offset+3] = (U8)((lval >> 32) & 0xff);
982 	    s[offset+4] = (U8)((lval >> 24) & 0xff);
983 	    s[offset+5] = (U8)((lval >> 16) & 0xff);
984 	    s[offset+6] = (U8)((lval >>  8) & 0xff);
985 	    s[offset+7] = (U8)( lval        & 0xff);
986 	}
987 #endif
988     }
989     SvSETMAGIC(targ);
990 }
991 
992 void
993 Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
994 {
995     dVAR;
996     STRLEN len;
997     char *s;
998 
999     PERL_ARGS_ASSERT_DO_CHOP;
1000 
1001     if (SvTYPE(sv) == SVt_PVAV) {
1002 	register I32 i;
1003 	AV *const av = MUTABLE_AV(sv);
1004 	const I32 max = AvFILL(av);
1005 
1006 	for (i = 0; i <= max; i++) {
1007 	    sv = MUTABLE_SV(av_fetch(av, i, FALSE));
1008 	    if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
1009 		do_chop(astr, sv);
1010 	}
1011         return;
1012     }
1013     else if (SvTYPE(sv) == SVt_PVHV) {
1014 	HV* const hv = MUTABLE_HV(sv);
1015 	HE* entry;
1016         (void)hv_iterinit(hv);
1017         while ((entry = hv_iternext(hv)))
1018             do_chop(astr,hv_iterval(hv,entry));
1019         return;
1020     }
1021     else if (SvREADONLY(sv)) {
1022         if (SvFAKE(sv)) {
1023             /* SV is copy-on-write */
1024 	    sv_force_normal_flags(sv, 0);
1025         }
1026         if (SvREADONLY(sv))
1027             Perl_croak(aTHX_ "%s", PL_no_modify);
1028     }
1029 
1030     if (PL_encoding && !SvUTF8(sv)) {
1031 	/* like in do_chomp(), utf8-ize the sv as a side-effect
1032 	 * if we're using encoding. */
1033 	sv_recode_to_utf8(sv, PL_encoding);
1034     }
1035 
1036     s = SvPV(sv, len);
1037     if (len && !SvPOK(sv))
1038 	s = SvPV_force_nomg(sv, len);
1039     if (DO_UTF8(sv)) {
1040 	if (s && len) {
1041 	    char * const send = s + len;
1042 	    char * const start = s;
1043 	    s = send - 1;
1044 	    while (s > start && UTF8_IS_CONTINUATION(*s))
1045 		s--;
1046 	    if (is_utf8_string((U8*)s, send - s)) {
1047 		sv_setpvn(astr, s, send - s);
1048 		*s = '\0';
1049 		SvCUR_set(sv, s - start);
1050 		SvNIOK_off(sv);
1051 		SvUTF8_on(astr);
1052 	    }
1053 	}
1054 	else
1055 	    sv_setpvs(astr, "");
1056     }
1057     else if (s && len) {
1058 	s += --len;
1059 	sv_setpvn(astr, s, 1);
1060 	*s = '\0';
1061 	SvCUR_set(sv, len);
1062 	SvUTF8_off(sv);
1063 	SvNIOK_off(sv);
1064     }
1065     else
1066 	sv_setpvs(astr, "");
1067     SvSETMAGIC(sv);
1068 }
1069 
1070 I32
1071 Perl_do_chomp(pTHX_ register SV *sv)
1072 {
1073     dVAR;
1074     register I32 count;
1075     STRLEN len;
1076     char *s;
1077     char *temp_buffer = NULL;
1078     SV* svrecode = NULL;
1079 
1080     PERL_ARGS_ASSERT_DO_CHOMP;
1081 
1082     if (RsSNARF(PL_rs))
1083 	return 0;
1084     if (RsRECORD(PL_rs))
1085       return 0;
1086     count = 0;
1087     if (SvTYPE(sv) == SVt_PVAV) {
1088 	register I32 i;
1089 	AV *const av = MUTABLE_AV(sv);
1090 	const I32 max = AvFILL(av);
1091 
1092 	for (i = 0; i <= max; i++) {
1093 	    sv = MUTABLE_SV(av_fetch(av, i, FALSE));
1094 	    if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
1095 		count += do_chomp(sv);
1096 	}
1097         return count;
1098     }
1099     else if (SvTYPE(sv) == SVt_PVHV) {
1100 	HV* const hv = MUTABLE_HV(sv);
1101 	HE* entry;
1102         (void)hv_iterinit(hv);
1103         while ((entry = hv_iternext(hv)))
1104             count += do_chomp(hv_iterval(hv,entry));
1105         return count;
1106     }
1107     else if (SvREADONLY(sv)) {
1108         if (SvFAKE(sv)) {
1109             /* SV is copy-on-write */
1110 	    sv_force_normal_flags(sv, 0);
1111         }
1112         if (SvREADONLY(sv))
1113             Perl_croak(aTHX_ "%s", PL_no_modify);
1114     }
1115 
1116     if (PL_encoding) {
1117 	if (!SvUTF8(sv)) {
1118 	/* XXX, here sv is utf8-ized as a side-effect!
1119 	   If encoding.pm is used properly, almost string-generating
1120 	   operations, including literal strings, chr(), input data, etc.
1121 	   should have been utf8-ized already, right?
1122 	*/
1123 	    sv_recode_to_utf8(sv, PL_encoding);
1124 	}
1125     }
1126 
1127     s = SvPV(sv, len);
1128     if (s && len) {
1129 	s += --len;
1130 	if (RsPARA(PL_rs)) {
1131 	    if (*s != '\n')
1132 		goto nope;
1133 	    ++count;
1134 	    while (len && s[-1] == '\n') {
1135 		--len;
1136 		--s;
1137 		++count;
1138 	    }
1139 	}
1140 	else {
1141 	    STRLEN rslen, rs_charlen;
1142 	    const char *rsptr = SvPV_const(PL_rs, rslen);
1143 
1144 	    rs_charlen = SvUTF8(PL_rs)
1145 		? sv_len_utf8(PL_rs)
1146 		: rslen;
1147 
1148 	    if (SvUTF8(PL_rs) != SvUTF8(sv)) {
1149 		/* Assumption is that rs is shorter than the scalar.  */
1150 		if (SvUTF8(PL_rs)) {
1151 		    /* RS is utf8, scalar is 8 bit.  */
1152 		    bool is_utf8 = TRUE;
1153 		    temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
1154 							 &rslen, &is_utf8);
1155 		    if (is_utf8) {
1156 			/* Cannot downgrade, therefore cannot possibly match
1157 			 */
1158 			assert (temp_buffer == rsptr);
1159 			temp_buffer = NULL;
1160 			goto nope;
1161 		    }
1162 		    rsptr = temp_buffer;
1163 		}
1164 		else if (PL_encoding) {
1165 		    /* RS is 8 bit, encoding.pm is used.
1166 		     * Do not recode PL_rs as a side-effect. */
1167 		   svrecode = newSVpvn(rsptr, rslen);
1168 		   sv_recode_to_utf8(svrecode, PL_encoding);
1169 		   rsptr = SvPV_const(svrecode, rslen);
1170 		   rs_charlen = sv_len_utf8(svrecode);
1171 		}
1172 		else {
1173 		    /* RS is 8 bit, scalar is utf8.  */
1174 		    temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
1175 		    rsptr = temp_buffer;
1176 		}
1177 	    }
1178 	    if (rslen == 1) {
1179 		if (*s != *rsptr)
1180 		    goto nope;
1181 		++count;
1182 	    }
1183 	    else {
1184 		if (len < rslen - 1)
1185 		    goto nope;
1186 		len -= rslen - 1;
1187 		s -= rslen - 1;
1188 		if (memNE(s, rsptr, rslen))
1189 		    goto nope;
1190 		count += rs_charlen;
1191 	    }
1192 	}
1193 	s = SvPV_force_nolen(sv);
1194 	SvCUR_set(sv, len);
1195 	*SvEND(sv) = '\0';
1196 	SvNIOK_off(sv);
1197 	SvSETMAGIC(sv);
1198     }
1199   nope:
1200 
1201     if (svrecode)
1202 	 SvREFCNT_dec(svrecode);
1203 
1204     Safefree(temp_buffer);
1205     return count;
1206 }
1207 
1208 void
1209 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1210 {
1211     dVAR;
1212 #ifdef LIBERAL
1213     register long *dl;
1214     register long *ll;
1215     register long *rl;
1216 #endif
1217     register char *dc;
1218     STRLEN leftlen;
1219     STRLEN rightlen;
1220     register const char *lc;
1221     register const char *rc;
1222     register STRLEN len;
1223     STRLEN lensave;
1224     const char *lsave;
1225     const char *rsave;
1226     bool left_utf;
1227     bool right_utf;
1228     STRLEN needlen = 0;
1229 
1230     PERL_ARGS_ASSERT_DO_VOP;
1231 
1232     if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
1233 	sv_setpvs(sv, "");	/* avoid undef warning on |= and ^= */
1234     if (sv == left) {
1235 	lsave = lc = SvPV_force_nomg(left, leftlen);
1236     }
1237     else {
1238 	lsave = lc = SvPV_nomg_const(left, leftlen);
1239 	SvPV_force_nomg_nolen(sv);
1240     }
1241     rsave = rc = SvPV_nomg_const(right, rightlen);
1242 
1243     /* This need to come after SvPV to ensure that string overloading has
1244        fired off.  */
1245 
1246     left_utf = DO_UTF8(left);
1247     right_utf = DO_UTF8(right);
1248 
1249     if (left_utf && !right_utf) {
1250 	/* Avoid triggering overloading again by using temporaries.
1251 	   Maybe there should be a variant of sv_utf8_upgrade that takes pvn
1252 	*/
1253 	right = newSVpvn_flags(rsave, rightlen, SVs_TEMP);
1254 	sv_utf8_upgrade(right);
1255 	rsave = rc = SvPV_nomg_const(right, rightlen);
1256 	right_utf = TRUE;
1257     }
1258     else if (!left_utf && right_utf) {
1259 	left = newSVpvn_flags(lsave, leftlen, SVs_TEMP);
1260 	sv_utf8_upgrade(left);
1261 	lsave = lc = SvPV_nomg_const(left, leftlen);
1262 	left_utf = TRUE;
1263     }
1264 
1265     len = leftlen < rightlen ? leftlen : rightlen;
1266     lensave = len;
1267     SvCUR_set(sv, len);
1268     (void)SvPOK_only(sv);
1269     if ((left_utf || right_utf) && (sv == left || sv == right)) {
1270 	needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
1271 	Newxz(dc, needlen + 1, char);
1272     }
1273     else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
1274 	dc = SvPV_force_nomg_nolen(sv);
1275 	if (SvLEN(sv) < len + 1) {
1276 	    dc = SvGROW(sv, len + 1);
1277 	    (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1278 	}
1279 	if (optype != OP_BIT_AND && (left_utf || right_utf))
1280 	    dc = SvGROW(sv, leftlen + rightlen + 1);
1281     }
1282     else {
1283 	needlen = optype == OP_BIT_AND
1284 		    ? len : (leftlen > rightlen ? leftlen : rightlen);
1285 	Newxz(dc, needlen + 1, char);
1286 	sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL);
1287 	dc = SvPVX(sv);		/* sv_usepvn() calls Renew() */
1288     }
1289     if (left_utf || right_utf) {
1290 	UV duc, luc, ruc;
1291 	char *dcorig = dc;
1292 	char *dcsave = NULL;
1293 	STRLEN lulen = leftlen;
1294 	STRLEN rulen = rightlen;
1295 	STRLEN ulen;
1296 
1297 	switch (optype) {
1298 	case OP_BIT_AND:
1299 	    while (lulen && rulen) {
1300 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1301 		lc += ulen;
1302 		lulen -= ulen;
1303 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1304 		rc += ulen;
1305 		rulen -= ulen;
1306 		duc = luc & ruc;
1307 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1308 	    }
1309 	    if (sv == left || sv == right)
1310 		(void)sv_usepvn(sv, dcorig, needlen);
1311 	    SvCUR_set(sv, dc - dcorig);
1312 	    break;
1313 	case OP_BIT_XOR:
1314 	    while (lulen && rulen) {
1315 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1316 		lc += ulen;
1317 		lulen -= ulen;
1318 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1319 		rc += ulen;
1320 		rulen -= ulen;
1321 		duc = luc ^ ruc;
1322 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1323 	    }
1324 	    goto mop_up_utf;
1325 	case OP_BIT_OR:
1326 	    while (lulen && rulen) {
1327 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1328 		lc += ulen;
1329 		lulen -= ulen;
1330 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1331 		rc += ulen;
1332 		rulen -= ulen;
1333 		duc = luc | ruc;
1334 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1335 	    }
1336 	  mop_up_utf:
1337 	    if (rulen)
1338 		dcsave = savepvn(rc, rulen);
1339 	    else if (lulen)
1340 		dcsave = savepvn(lc, lulen);
1341 	    if (sv == left || sv == right)
1342 		(void)sv_usepvn(sv, dcorig, needlen); /* Uses Renew(). */
1343 	    SvCUR_set(sv, dc - dcorig);
1344 	    if (rulen)
1345 		sv_catpvn(sv, dcsave, rulen);
1346 	    else if (lulen)
1347 		sv_catpvn(sv, dcsave, lulen);
1348 	    else
1349 		*SvEND(sv) = '\0';
1350 	    Safefree(dcsave);
1351 	    break;
1352 	default:
1353 	    if (sv == left || sv == right)
1354 		Safefree(dcorig);
1355 	    Perl_croak(aTHX_ "panic: do_vop called for op %u (%s)",
1356 			(unsigned)optype, PL_op_name[optype]);
1357 	}
1358 	SvUTF8_on(sv);
1359 	goto finish;
1360     }
1361     else
1362 #ifdef LIBERAL
1363     if (len >= sizeof(long)*4 &&
1364 	!((unsigned long)dc % sizeof(long)) &&
1365 	!((unsigned long)lc % sizeof(long)) &&
1366 	!((unsigned long)rc % sizeof(long)))	/* It's almost always aligned... */
1367     {
1368 	const STRLEN remainder = len % (sizeof(long)*4);
1369 	len /= (sizeof(long)*4);
1370 
1371 	dl = (long*)dc;
1372 	ll = (long*)lc;
1373 	rl = (long*)rc;
1374 
1375 	switch (optype) {
1376 	case OP_BIT_AND:
1377 	    while (len--) {
1378 		*dl++ = *ll++ & *rl++;
1379 		*dl++ = *ll++ & *rl++;
1380 		*dl++ = *ll++ & *rl++;
1381 		*dl++ = *ll++ & *rl++;
1382 	    }
1383 	    break;
1384 	case OP_BIT_XOR:
1385 	    while (len--) {
1386 		*dl++ = *ll++ ^ *rl++;
1387 		*dl++ = *ll++ ^ *rl++;
1388 		*dl++ = *ll++ ^ *rl++;
1389 		*dl++ = *ll++ ^ *rl++;
1390 	    }
1391 	    break;
1392 	case OP_BIT_OR:
1393 	    while (len--) {
1394 		*dl++ = *ll++ | *rl++;
1395 		*dl++ = *ll++ | *rl++;
1396 		*dl++ = *ll++ | *rl++;
1397 		*dl++ = *ll++ | *rl++;
1398 	    }
1399 	}
1400 
1401 	dc = (char*)dl;
1402 	lc = (char*)ll;
1403 	rc = (char*)rl;
1404 
1405 	len = remainder;
1406     }
1407 #endif
1408     {
1409 	switch (optype) {
1410 	case OP_BIT_AND:
1411 	    while (len--)
1412 		*dc++ = *lc++ & *rc++;
1413 	    *dc = '\0';
1414 	    break;
1415 	case OP_BIT_XOR:
1416 	    while (len--)
1417 		*dc++ = *lc++ ^ *rc++;
1418 	    goto mop_up;
1419 	case OP_BIT_OR:
1420 	    while (len--)
1421 		*dc++ = *lc++ | *rc++;
1422 	  mop_up:
1423 	    len = lensave;
1424 	    if (rightlen > len)
1425 		sv_catpvn(sv, rsave + len, rightlen - len);
1426 	    else if (leftlen > (STRLEN)len)
1427 		sv_catpvn(sv, lsave + len, leftlen - len);
1428 	    else
1429 		*SvEND(sv) = '\0';
1430 	    break;
1431 	}
1432     }
1433 finish:
1434     SvTAINT(sv);
1435 }
1436 
1437 OP *
1438 Perl_do_kv(pTHX)
1439 {
1440     dVAR;
1441     dSP;
1442     HV * const hv = MUTABLE_HV(POPs);
1443     HV *keys;
1444     register HE *entry;
1445     const I32 gimme = GIMME_V;
1446     const I32 dokv =     (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV);
1447     const I32 dokeys =   dokv || (PL_op->op_type == OP_KEYS);
1448     const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES);
1449 
1450     if (!hv) {
1451 	if (PL_op->op_flags & OPf_MOD || LVRET) {	/* lvalue */
1452 	    dTARGET;		/* make sure to clear its target here */
1453 	    if (SvTYPE(TARG) == SVt_PVLV)
1454 		LvTARG(TARG) = NULL;
1455 	    PUSHs(TARG);
1456 	}
1457 	RETURN;
1458     }
1459 
1460     keys = hv;
1461     (void)hv_iterinit(keys);	/* always reset iterator regardless */
1462 
1463     if (gimme == G_VOID)
1464 	RETURN;
1465 
1466     if (gimme == G_SCALAR) {
1467 	IV i;
1468 	dTARGET;
1469 
1470 	if (PL_op->op_flags & OPf_MOD || LVRET) {	/* lvalue */
1471 	    if (SvTYPE(TARG) < SVt_PVLV) {
1472 		sv_upgrade(TARG, SVt_PVLV);
1473 		sv_magic(TARG, NULL, PERL_MAGIC_nkeys, NULL, 0);
1474 	    }
1475 	    LvTYPE(TARG) = 'k';
1476 	    if (LvTARG(TARG) != (const SV *)keys) {
1477 		if (LvTARG(TARG))
1478 		    SvREFCNT_dec(LvTARG(TARG));
1479 		LvTARG(TARG) = SvREFCNT_inc_simple(keys);
1480 	    }
1481 	    PUSHs(TARG);
1482 	    RETURN;
1483 	}
1484 
1485 	if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) )
1486 	{
1487 	    i = HvKEYS(keys);
1488 	}
1489 	else {
1490 	    i = 0;
1491 	    while (hv_iternext(keys)) i++;
1492 	}
1493 	PUSHi( i );
1494 	RETURN;
1495     }
1496 
1497     EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
1498 
1499     PUTBACK;	/* hv_iternext and hv_iterval might clobber stack_sp */
1500     while ((entry = hv_iternext(keys))) {
1501 	SPAGAIN;
1502 	if (dokeys) {
1503 	    SV* const sv = hv_iterkeysv(entry);
1504 	    XPUSHs(sv);	/* won't clobber stack_sp */
1505 	}
1506 	if (dovalues) {
1507 	    SV *tmpstr;
1508 	    PUTBACK;
1509 	    tmpstr = hv_iterval(hv,entry);
1510 	    DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
1511 			    (unsigned long)HeHASH(entry),
1512 			    (int)HvMAX(keys)+1,
1513 			    (unsigned long)(HeHASH(entry) & HvMAX(keys))));
1514 	    SPAGAIN;
1515 	    XPUSHs(tmpstr);
1516 	}
1517 	PUTBACK;
1518     }
1519     return NORMAL;
1520 }
1521 
1522 /*
1523  * Local variables:
1524  * c-indentation-style: bsd
1525  * c-basic-offset: 4
1526  * indent-tabs-mode: t
1527  * End:
1528  *
1529  * ex: set ts=8 sts=4 sw=4 noet:
1530  */
1531