xref: /openbsd/gnu/usr.bin/perl/doop.c (revision cecf84d4)
1 /*    doop.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'So that was the job I felt I had to do when I started,' thought Sam.
13  *
14  *     [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"]
15  */
16 
17 /* This file contains some common functions needed to carry out certain
18  * ops. For example, both pp_sprintf() and pp_prtf() call the function
19  * do_printf() found in this file.
20  */
21 
22 #include "EXTERN.h"
23 #define PERL_IN_DOOP_C
24 #include "perl.h"
25 
26 #ifndef PERL_MICRO
27 #include <signal.h>
28 #endif
29 
30 STATIC I32
31 S_do_trans_simple(pTHX_ SV * const sv)
32 {
33     dVAR;
34     I32 matches = 0;
35     STRLEN len;
36     U8 *s = (U8*)SvPV_nomg(sv,len);
37     U8 * const send = s+len;
38     const short * const tbl = (short*)cPVOP->op_pv;
39 
40     PERL_ARGS_ASSERT_DO_TRANS_SIMPLE;
41 
42     if (!tbl)
43 	Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
44 
45     /* First, take care of non-UTF-8 input strings, because they're easy */
46     if (!SvUTF8(sv)) {
47 	while (s < send) {
48 	    const I32 ch = tbl[*s];
49 	    if (ch >= 0) {
50 		matches++;
51 		*s = (U8)ch;
52 	    }
53 	    s++;
54 	}
55 	SvSETMAGIC(sv);
56     }
57     else {
58 	const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
59 	U8 *d;
60 	U8 *dstart;
61 
62 	/* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
63 	if (grows)
64 	    Newx(d, len*2+1, U8);
65 	else
66 	    d = s;
67 	dstart = d;
68 	while (s < send) {
69 	    STRLEN ulen;
70 	    I32 ch;
71 
72 	    /* Need to check this, otherwise 128..255 won't match */
73 	    const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
74 	    if (c < 0x100 && (ch = tbl[c]) >= 0) {
75 		matches++;
76 		d = uvchr_to_utf8(d, ch);
77 		s += ulen;
78 	    }
79 	    else { /* No match -> copy */
80 		Move(s, d, ulen, U8);
81 		d += ulen;
82 		s += ulen;
83 	    }
84 	}
85 	if (grows) {
86 	    sv_setpvn(sv, (char*)dstart, d - dstart);
87 	    Safefree(dstart);
88 	}
89 	else {
90 	    *d = '\0';
91 	    SvCUR_set(sv, d - dstart);
92 	}
93 	SvUTF8_on(sv);
94 	SvSETMAGIC(sv);
95     }
96     return matches;
97 }
98 
99 STATIC I32
100 S_do_trans_count(pTHX_ SV * const sv)
101 {
102     dVAR;
103     STRLEN len;
104     const U8 *s = (const U8*)SvPV_nomg_const(sv, len);
105     const U8 * const send = s + len;
106     I32 matches = 0;
107     const short * const tbl = (short*)cPVOP->op_pv;
108 
109     PERL_ARGS_ASSERT_DO_TRANS_COUNT;
110 
111     if (!tbl)
112 	Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
113 
114     if (!SvUTF8(sv)) {
115 	while (s < send) {
116             if (tbl[*s++] >= 0)
117                 matches++;
118 	}
119     }
120     else {
121 	const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
122 	while (s < send) {
123 	    STRLEN ulen;
124 	    const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
125 	    if (c < 0x100) {
126 		if (tbl[c] >= 0)
127 		    matches++;
128 	    } else if (complement)
129 		matches++;
130 	    s += ulen;
131 	}
132     }
133 
134     return matches;
135 }
136 
137 STATIC I32
138 S_do_trans_complex(pTHX_ SV * const sv)
139 {
140     dVAR;
141     STRLEN len;
142     U8 *s = (U8*)SvPV_nomg(sv, len);
143     U8 * const send = s+len;
144     I32 matches = 0;
145     const short * const tbl = (short*)cPVOP->op_pv;
146 
147     PERL_ARGS_ASSERT_DO_TRANS_COMPLEX;
148 
149     if (!tbl)
150 	Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
151 
152     if (!SvUTF8(sv)) {
153 	U8 *d = s;
154 	U8 * const dstart = d;
155 
156 	if (PL_op->op_private & OPpTRANS_SQUASH) {
157 	    const U8* p = send;
158 	    while (s < send) {
159 		const I32 ch = tbl[*s];
160 		if (ch >= 0) {
161 		    *d = (U8)ch;
162 		    matches++;
163 		    if (p != d - 1 || *p != *d)
164 			p = d++;
165 		}
166 		else if (ch == -1)	/* -1 is unmapped character */
167 		    *d++ = *s;
168 		else if (ch == -2)	/* -2 is delete character */
169 		    matches++;
170 		s++;
171 	    }
172 	}
173 	else {
174 	    while (s < send) {
175 		const I32 ch = tbl[*s];
176 		if (ch >= 0) {
177 		    matches++;
178 		    *d++ = (U8)ch;
179 		}
180 		else if (ch == -1)	/* -1 is unmapped character */
181 		    *d++ = *s;
182 		else if (ch == -2)      /* -2 is delete character */
183 		    matches++;
184 		s++;
185 	    }
186 	}
187 	*d = '\0';
188 	SvCUR_set(sv, d - dstart);
189     }
190     else { /* is utf8 */
191 	const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
192 	const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
193 	const I32 del = PL_op->op_private & OPpTRANS_DELETE;
194 	U8 *d;
195 	U8 *dstart;
196 	STRLEN rlen = 0;
197 
198 	if (grows)
199 	    Newx(d, len*2+1, U8);
200 	else
201 	    d = s;
202 	dstart = d;
203 	if (complement && !del)
204 	    rlen = tbl[0x100];
205 
206 	if (PL_op->op_private & OPpTRANS_SQUASH) {
207 	    UV pch = 0xfeedface;
208 	    while (s < send) {
209 		STRLEN len;
210 		const UV comp = utf8n_to_uvchr(s, send - s, &len,
211 					       UTF8_ALLOW_DEFAULT);
212 		I32 ch;
213 
214 		if (comp > 0xff) {
215 		    if (!complement) {
216 			Move(s, d, len, U8);
217 			d += len;
218 		    }
219 		    else {
220 			matches++;
221 			if (!del) {
222 			    ch = (rlen == 0) ? (I32)comp :
223 				(comp - 0x100 < rlen) ?
224 				tbl[comp+1] : tbl[0x100+rlen];
225 			    if ((UV)ch != pch) {
226 				d = uvchr_to_utf8(d, ch);
227 				pch = (UV)ch;
228 			    }
229 			    s += len;
230 			    continue;
231 			}
232 		    }
233 		}
234 		else if ((ch = tbl[comp]) >= 0) {
235 		    matches++;
236 		    if ((UV)ch != pch) {
237 		        d = uvchr_to_utf8(d, ch);
238 		        pch = (UV)ch;
239 		    }
240 		    s += len;
241 		    continue;
242 		}
243 		else if (ch == -1) {	/* -1 is unmapped character */
244 		    Move(s, d, len, U8);
245 		    d += len;
246 		}
247 		else if (ch == -2)      /* -2 is delete character */
248 		    matches++;
249 		s += len;
250 		pch = 0xfeedface;
251 	    }
252 	}
253 	else {
254 	    while (s < send) {
255 		STRLEN len;
256 		const UV comp = utf8n_to_uvchr(s, send - s, &len,
257 					       UTF8_ALLOW_DEFAULT);
258 		I32 ch;
259 		if (comp > 0xff) {
260 		    if (!complement) {
261 			Move(s, d, len, U8);
262 			d += len;
263 		    }
264 		    else {
265 			matches++;
266 			if (!del) {
267 			    if (comp - 0x100 < rlen)
268 				d = uvchr_to_utf8(d, tbl[comp+1]);
269 			    else
270 				d = uvchr_to_utf8(d, tbl[0x100+rlen]);
271 			}
272 		    }
273 		}
274 		else if ((ch = tbl[comp]) >= 0) {
275 		    d = uvchr_to_utf8(d, ch);
276 		    matches++;
277 		}
278 		else if (ch == -1) {	/* -1 is unmapped character */
279 		    Move(s, d, len, U8);
280 		    d += len;
281 		}
282 		else if (ch == -2)      /* -2 is delete character */
283 		    matches++;
284 		s += len;
285 	    }
286 	}
287 	if (grows) {
288 	    sv_setpvn(sv, (char*)dstart, d - dstart);
289 	    Safefree(dstart);
290 	}
291 	else {
292 	    *d = '\0';
293 	    SvCUR_set(sv, d - dstart);
294 	}
295 	SvUTF8_on(sv);
296     }
297     SvSETMAGIC(sv);
298     return matches;
299 }
300 
301 STATIC I32
302 S_do_trans_simple_utf8(pTHX_ SV * const sv)
303 {
304     dVAR;
305     U8 *s;
306     U8 *send;
307     U8 *d;
308     U8 *start;
309     U8 *dstart, *dend;
310     I32 matches = 0;
311     const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
312     STRLEN len;
313     SV* const  rv =
314 #ifdef USE_ITHREADS
315 		    PAD_SVl(cPADOP->op_padix);
316 #else
317 		    MUTABLE_SV(cSVOP->op_sv);
318 #endif
319     HV* const  hv = MUTABLE_HV(SvRV(rv));
320     SV* const * svp = hv_fetchs(hv, "NONE", FALSE);
321     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
322     const UV extra = none + 1;
323     UV final = 0;
324     U8 hibit = 0;
325 
326     PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8;
327 
328     s = (U8*)SvPV_nomg(sv, len);
329     if (!SvUTF8(sv)) {
330 	const U8 *t = s;
331 	const U8 * const e = s + len;
332 	while (t < e) {
333 	    const U8 ch = *t++;
334 	    hibit = !NATIVE_BYTE_IS_INVARIANT(ch);
335 	    if (hibit) {
336 		s = bytes_to_utf8(s, &len);
337 		break;
338 	    }
339 	}
340     }
341     send = s + len;
342     start = s;
343 
344     svp = hv_fetchs(hv, "FINAL", FALSE);
345     if (svp)
346 	final = SvUV(*svp);
347 
348     if (grows) {
349 	/* d needs to be bigger than s, in case e.g. upgrading is required */
350 	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
351 	dend = d + len * 3;
352 	dstart = d;
353     }
354     else {
355 	dstart = d = s;
356 	dend = d + len;
357     }
358 
359     while (s < send) {
360 	const UV uv = swash_fetch(rv, s, TRUE);
361 	if (uv < none) {
362 	    s += UTF8SKIP(s);
363 	    matches++;
364 	    d = uvchr_to_utf8(d, uv);
365 	}
366 	else if (uv == none) {
367 	    const int i = UTF8SKIP(s);
368 	    Move(s, d, i, U8);
369 	    d += i;
370 	    s += i;
371 	}
372 	else if (uv == extra) {
373 	    s += UTF8SKIP(s);
374 	    matches++;
375 	    d = uvchr_to_utf8(d, final);
376 	}
377 	else
378 	    s += UTF8SKIP(s);
379 
380 	if (d > dend) {
381 	    const STRLEN clen = d - dstart;
382 	    const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
383 	    if (!grows)
384 		Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
385 	    Renew(dstart, nlen + UTF8_MAXBYTES, U8);
386 	    d = dstart + clen;
387 	    dend = dstart + nlen;
388 	}
389     }
390     if (grows || hibit) {
391 	sv_setpvn(sv, (char*)dstart, d - dstart);
392 	Safefree(dstart);
393 	if (grows && hibit)
394 	    Safefree(start);
395     }
396     else {
397 	*d = '\0';
398 	SvCUR_set(sv, d - dstart);
399     }
400     SvSETMAGIC(sv);
401     SvUTF8_on(sv);
402 
403     return matches;
404 }
405 
406 STATIC I32
407 S_do_trans_count_utf8(pTHX_ SV * const sv)
408 {
409     dVAR;
410     const U8 *s;
411     const U8 *start = NULL;
412     const U8 *send;
413     I32 matches = 0;
414     STRLEN len;
415     SV* const  rv =
416 #ifdef USE_ITHREADS
417 		    PAD_SVl(cPADOP->op_padix);
418 #else
419 		    MUTABLE_SV(cSVOP->op_sv);
420 #endif
421     HV* const hv = MUTABLE_HV(SvRV(rv));
422     SV* const * const svp = hv_fetchs(hv, "NONE", FALSE);
423     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
424     const UV extra = none + 1;
425     U8 hibit = 0;
426 
427     PERL_ARGS_ASSERT_DO_TRANS_COUNT_UTF8;
428 
429     s = (const U8*)SvPV_nomg_const(sv, len);
430     if (!SvUTF8(sv)) {
431 	const U8 *t = s;
432 	const U8 * const e = s + len;
433 	while (t < e) {
434 	    const U8 ch = *t++;
435 	    hibit = !NATIVE_BYTE_IS_INVARIANT(ch);
436 	    if (hibit) {
437 		start = s = bytes_to_utf8(s, &len);
438 		break;
439 	    }
440 	}
441     }
442     send = s + len;
443 
444     while (s < send) {
445 	const UV uv = swash_fetch(rv, s, TRUE);
446 	if (uv < none || uv == extra)
447 	    matches++;
448 	s += UTF8SKIP(s);
449     }
450     if (hibit)
451         Safefree(start);
452 
453     return matches;
454 }
455 
456 STATIC I32
457 S_do_trans_complex_utf8(pTHX_ SV * const sv)
458 {
459     dVAR;
460     U8 *start, *send;
461     U8 *d;
462     I32 matches = 0;
463     const I32 squash   = PL_op->op_private & OPpTRANS_SQUASH;
464     const I32 del      = PL_op->op_private & OPpTRANS_DELETE;
465     const I32 grows    = PL_op->op_private & OPpTRANS_GROWS;
466     SV* const  rv =
467 #ifdef USE_ITHREADS
468 		    PAD_SVl(cPADOP->op_padix);
469 #else
470 		    MUTABLE_SV(cSVOP->op_sv);
471 #endif
472     HV * const hv = MUTABLE_HV(SvRV(rv));
473     SV * const *svp = hv_fetchs(hv, "NONE", FALSE);
474     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
475     const UV extra = none + 1;
476     UV final = 0;
477     bool havefinal = FALSE;
478     STRLEN len;
479     U8 *dstart, *dend;
480     U8 hibit = 0;
481     U8 *s = (U8*)SvPV_nomg(sv, len);
482 
483     PERL_ARGS_ASSERT_DO_TRANS_COMPLEX_UTF8;
484 
485     if (!SvUTF8(sv)) {
486 	const U8 *t = s;
487 	const U8 * const e = s + len;
488 	while (t < e) {
489 	    const U8 ch = *t++;
490 	    hibit = !NATIVE_BYTE_IS_INVARIANT(ch);
491 	    if (hibit) {
492 		s = bytes_to_utf8(s, &len);
493 		break;
494 	    }
495 	}
496     }
497     send = s + len;
498     start = s;
499 
500     svp = hv_fetchs(hv, "FINAL", FALSE);
501     if (svp) {
502 	final = SvUV(*svp);
503 	havefinal = TRUE;
504     }
505 
506     if (grows) {
507 	/* d needs to be bigger than s, in case e.g. upgrading is required */
508 	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
509 	dend = d + len * 3;
510 	dstart = d;
511     }
512     else {
513 	dstart = d = s;
514 	dend = d + len;
515     }
516 
517     if (squash) {
518 	UV puv = 0xfeedface;
519 	while (s < send) {
520 	    UV uv = swash_fetch(rv, s, TRUE);
521 
522 	    if (d > dend) {
523 		const STRLEN clen = d - dstart;
524 		const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
525 		if (!grows)
526 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
527 		Renew(dstart, nlen + UTF8_MAXBYTES, U8);
528 		d = dstart + clen;
529 		dend = dstart + nlen;
530 	    }
531 	    if (uv < none) {
532 		matches++;
533 		s += UTF8SKIP(s);
534 		if (uv != puv) {
535 		    d = uvchr_to_utf8(d, uv);
536 		    puv = uv;
537 		}
538 		continue;
539 	    }
540 	    else if (uv == none) {	/* "none" is unmapped character */
541 		const int i = UTF8SKIP(s);
542 		Move(s, d, i, U8);
543 		d += i;
544 		s += i;
545 		puv = 0xfeedface;
546 		continue;
547 	    }
548 	    else if (uv == extra && !del) {
549 		matches++;
550 		if (havefinal) {
551 		    s += UTF8SKIP(s);
552 		    if (puv != final) {
553 			d = uvchr_to_utf8(d, final);
554 			puv = final;
555 		    }
556 		}
557 		else {
558 		    STRLEN len;
559 		    uv = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT);
560 		    if (uv != puv) {
561 			Move(s, d, len, U8);
562 			d += len;
563 			puv = uv;
564 		    }
565 		    s += len;
566 		}
567 		continue;
568 	    }
569 	    matches++;			/* "none+1" is delete character */
570 	    s += UTF8SKIP(s);
571 	}
572     }
573     else {
574 	while (s < send) {
575 	    const UV uv = swash_fetch(rv, s, TRUE);
576 	    if (d > dend) {
577 	        const STRLEN clen = d - dstart;
578 		const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
579 		if (!grows)
580 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
581 		Renew(dstart, nlen + UTF8_MAXBYTES, U8);
582 		d = dstart + clen;
583 		dend = dstart + nlen;
584 	    }
585 	    if (uv < none) {
586 		matches++;
587 		s += UTF8SKIP(s);
588 		d = uvchr_to_utf8(d, uv);
589 		continue;
590 	    }
591 	    else if (uv == none) {	/* "none" is unmapped character */
592 		const int i = UTF8SKIP(s);
593 		Move(s, d, i, U8);
594 		d += i;
595 		s += i;
596 		continue;
597 	    }
598 	    else if (uv == extra && !del) {
599 		matches++;
600 		s += UTF8SKIP(s);
601 		d = uvchr_to_utf8(d, final);
602 		continue;
603 	    }
604 	    matches++;			/* "none+1" is delete character */
605 	    s += UTF8SKIP(s);
606 	}
607     }
608     if (grows || hibit) {
609 	sv_setpvn(sv, (char*)dstart, d - dstart);
610 	Safefree(dstart);
611 	if (grows && hibit)
612 	    Safefree(start);
613     }
614     else {
615 	*d = '\0';
616 	SvCUR_set(sv, d - dstart);
617     }
618     SvUTF8_on(sv);
619     SvSETMAGIC(sv);
620 
621     return matches;
622 }
623 
624 I32
625 Perl_do_trans(pTHX_ SV *sv)
626 {
627     dVAR;
628     STRLEN len;
629     const I32 hasutf = (PL_op->op_private &
630                     (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
631 
632     PERL_ARGS_ASSERT_DO_TRANS;
633 
634     if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) {
635             Perl_croak_no_modify();
636     }
637     (void)SvPV_const(sv, len);
638     if (!len)
639 	return 0;
640     if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) {
641 	if (!SvPOKp(sv) || SvTHINKFIRST(sv))
642 	    (void)SvPV_force_nomg(sv, len);
643 	(void)SvPOK_only_UTF8(sv);
644     }
645 
646     DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
647 
648     switch (PL_op->op_private & ~hasutf & (
649 		OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL|
650 		OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
651     case 0:
652 	if (hasutf)
653 	    return do_trans_simple_utf8(sv);
654 	else
655 	    return do_trans_simple(sv);
656 
657     case OPpTRANS_IDENTICAL:
658     case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT:
659 	if (hasutf)
660 	    return do_trans_count_utf8(sv);
661 	else
662 	    return do_trans_count(sv);
663 
664     default:
665 	if (hasutf)
666 	    return do_trans_complex_utf8(sv);
667 	else
668 	    return do_trans_complex(sv);
669     }
670 }
671 
672 void
673 Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
674 {
675     dVAR;
676     SV ** const oldmark = mark;
677     I32 items = sp - mark;
678     STRLEN len;
679     STRLEN delimlen;
680 
681     PERL_ARGS_ASSERT_DO_JOIN;
682 
683     (void) SvPV_const(delim, delimlen); /* stringify and get the delimlen */
684     /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
685 
686     mark++;
687     len = (items > 0 ? (delimlen * (items - 1) ) : 0);
688     SvUPGRADE(sv, SVt_PV);
689     if (SvLEN(sv) < len + items) {	/* current length is way too short */
690 	while (items-- > 0) {
691 	    if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
692 		STRLEN tmplen;
693 		SvPV_const(*mark, tmplen);
694 		len += tmplen;
695 	    }
696 	    mark++;
697 	}
698 	SvGROW(sv, len + 1);		/* so try to pre-extend */
699 
700 	mark = oldmark;
701 	items = sp - mark;
702 	++mark;
703     }
704 
705     sv_setpvs(sv, "");
706     /* sv_setpv retains old UTF8ness [perl #24846] */
707     SvUTF8_off(sv);
708 
709     if (TAINTING_get && SvMAGICAL(sv))
710 	SvTAINTED_off(sv);
711 
712     if (items-- > 0) {
713 	if (*mark)
714 	    sv_catsv(sv, *mark);
715 	mark++;
716     }
717 
718     if (delimlen) {
719 	for (; items > 0; items--,mark++) {
720 	    sv_catsv_nomg(sv,delim);
721 	    sv_catsv(sv,*mark);
722 	}
723     }
724     else {
725 	for (; items > 0; items--,mark++)
726 	    sv_catsv(sv,*mark);
727     }
728     SvSETMAGIC(sv);
729 }
730 
731 void
732 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
733 {
734     dVAR;
735     STRLEN patlen;
736     const char * const pat = SvPV_const(*sarg, patlen);
737     bool do_taint = FALSE;
738 
739     PERL_ARGS_ASSERT_DO_SPRINTF;
740 
741     if (SvTAINTED(*sarg))
742 	TAINT_PROPER(
743 		(PL_op && PL_op->op_type < OP_max)
744 		    ? (PL_op->op_type == OP_PRTF)
745 			? "printf"
746 			: PL_op_name[PL_op->op_type]
747 		    : "(unknown)"
748 	);
749     SvUTF8_off(sv);
750     if (DO_UTF8(*sarg))
751         SvUTF8_on(sv);
752     sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, len - 1, &do_taint);
753     SvSETMAGIC(sv);
754     if (do_taint)
755 	SvTAINTED_on(sv);
756 }
757 
758 /* currently converts input to bytes if possible, but doesn't sweat failure */
759 UV
760 Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size)
761 {
762     dVAR;
763     STRLEN srclen, len, uoffset, bitoffs = 0;
764     const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET)
765                                           ? SV_UNDEF_RETURNS_NULL : 0);
766     unsigned char *s = (unsigned char *)
767                             SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC));
768     UV retnum = 0;
769 
770     if (!s) {
771       s = (unsigned char *)"";
772     }
773 
774     PERL_ARGS_ASSERT_DO_VECGET;
775 
776     if (offset < 0)
777 	return 0;
778     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
779 	Perl_croak(aTHX_ "Illegal number of bits in vec");
780 
781     if (SvUTF8(sv)) {
782 	(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
783         /* PVX may have changed */
784         s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags);
785     }
786 
787     if (size < 8) {
788 	bitoffs = ((offset%8)*size)%8;
789 	uoffset = offset/(8/size);
790     }
791     else if (size > 8)
792 	uoffset = offset*(size/8);
793     else
794 	uoffset = offset;
795 
796     len = uoffset + (bitoffs + size + 7)/8;	/* required number of bytes */
797     if (len > srclen) {
798 	if (size <= 8)
799 	    retnum = 0;
800 	else {
801 	    if (size == 16) {
802 		if (uoffset >= srclen)
803 		    retnum = 0;
804 		else
805 		    retnum = (UV) s[uoffset] <<  8;
806 	    }
807 	    else if (size == 32) {
808 		if (uoffset >= srclen)
809 		    retnum = 0;
810 		else if (uoffset + 1 >= srclen)
811 		    retnum =
812 			((UV) s[uoffset    ] << 24);
813 		else if (uoffset + 2 >= srclen)
814 		    retnum =
815 			((UV) s[uoffset    ] << 24) +
816 			((UV) s[uoffset + 1] << 16);
817 		else
818 		    retnum =
819 			((UV) s[uoffset    ] << 24) +
820 			((UV) s[uoffset + 1] << 16) +
821 			(     s[uoffset + 2] <<  8);
822 	    }
823 #ifdef UV_IS_QUAD
824 	    else if (size == 64) {
825 		Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
826 			       "Bit vector size > 32 non-portable");
827 		if (uoffset >= srclen)
828 		    retnum = 0;
829 		else if (uoffset + 1 >= srclen)
830 		    retnum =
831 			(UV) s[uoffset     ] << 56;
832 		else if (uoffset + 2 >= srclen)
833 		    retnum =
834 			((UV) s[uoffset    ] << 56) +
835 			((UV) s[uoffset + 1] << 48);
836 		else if (uoffset + 3 >= srclen)
837 		    retnum =
838 			((UV) s[uoffset    ] << 56) +
839 			((UV) s[uoffset + 1] << 48) +
840 			((UV) s[uoffset + 2] << 40);
841 		else if (uoffset + 4 >= srclen)
842 		    retnum =
843 			((UV) s[uoffset    ] << 56) +
844 			((UV) s[uoffset + 1] << 48) +
845 			((UV) s[uoffset + 2] << 40) +
846 			((UV) s[uoffset + 3] << 32);
847 		else if (uoffset + 5 >= srclen)
848 		    retnum =
849 			((UV) s[uoffset    ] << 56) +
850 			((UV) s[uoffset + 1] << 48) +
851 			((UV) s[uoffset + 2] << 40) +
852 			((UV) s[uoffset + 3] << 32) +
853 			(     s[uoffset + 4] << 24);
854 		else if (uoffset + 6 >= srclen)
855 		    retnum =
856 			((UV) s[uoffset    ] << 56) +
857 			((UV) s[uoffset + 1] << 48) +
858 			((UV) s[uoffset + 2] << 40) +
859 			((UV) s[uoffset + 3] << 32) +
860 			((UV) s[uoffset + 4] << 24) +
861 			((UV) s[uoffset + 5] << 16);
862 		else
863 		    retnum =
864 			((UV) s[uoffset    ] << 56) +
865 			((UV) s[uoffset + 1] << 48) +
866 			((UV) s[uoffset + 2] << 40) +
867 			((UV) s[uoffset + 3] << 32) +
868 			((UV) s[uoffset + 4] << 24) +
869 			((UV) s[uoffset + 5] << 16) +
870 			(     s[uoffset + 6] <<  8);
871 	    }
872 #endif
873 	}
874     }
875     else if (size < 8)
876 	retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1);
877     else {
878 	if (size == 8)
879 	    retnum = s[uoffset];
880 	else if (size == 16)
881 	    retnum =
882 		((UV) s[uoffset] <<      8) +
883 		      s[uoffset + 1];
884 	else if (size == 32)
885 	    retnum =
886 		((UV) s[uoffset    ] << 24) +
887 		((UV) s[uoffset + 1] << 16) +
888 		(     s[uoffset + 2] <<  8) +
889 		      s[uoffset + 3];
890 #ifdef UV_IS_QUAD
891 	else if (size == 64) {
892 	    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
893 			   "Bit vector size > 32 non-portable");
894 	    retnum =
895 		((UV) s[uoffset    ] << 56) +
896 		((UV) s[uoffset + 1] << 48) +
897 		((UV) s[uoffset + 2] << 40) +
898 		((UV) s[uoffset + 3] << 32) +
899 		((UV) s[uoffset + 4] << 24) +
900 		((UV) s[uoffset + 5] << 16) +
901 		(     s[uoffset + 6] <<  8) +
902 		      s[uoffset + 7];
903 	}
904 #endif
905     }
906 
907     return retnum;
908 }
909 
910 /* currently converts input to bytes if possible but doesn't sweat failures,
911  * although it does ensure that the string it clobbers is not marked as
912  * utf8-valid any more
913  */
914 void
915 Perl_do_vecset(pTHX_ SV *sv)
916 {
917     dVAR;
918     SSize_t offset, bitoffs = 0;
919     int size;
920     unsigned char *s;
921     UV lval;
922     I32 mask;
923     STRLEN targlen;
924     STRLEN len;
925     SV * const targ = LvTARG(sv);
926 
927     PERL_ARGS_ASSERT_DO_VECSET;
928 
929     if (!targ)
930 	return;
931     s = (unsigned char*)SvPV_force_flags(targ, targlen,
932                                          SV_GMAGIC | SV_UNDEF_RETURNS_NULL);
933     if (SvUTF8(targ)) {
934 	/* This is handled by the SvPOK_only below...
935 	if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
936 	    SvUTF8_off(targ);
937 	 */
938 	(void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
939     }
940 
941     (void)SvPOK_only(targ);
942     lval = SvUV(sv);
943     offset = LvTARGOFF(sv);
944     if (offset < 0)
945 	Perl_croak(aTHX_ "Negative offset to vec in lvalue context");
946     size = LvTARGLEN(sv);
947     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
948 	Perl_croak(aTHX_ "Illegal number of bits in vec");
949 
950     if (size < 8) {
951 	bitoffs = ((offset%8)*size)%8;
952 	offset /= 8/size;
953     }
954     else if (size > 8)
955 	offset *= size/8;
956 
957     len = offset + (bitoffs + size + 7)/8;	/* required number of bytes */
958     if (len > targlen) {
959 	s = (unsigned char*)SvGROW(targ, len + 1);
960 	(void)memzero((char *)(s + targlen), len - targlen + 1);
961 	SvCUR_set(targ, len);
962     }
963 
964     if (size < 8) {
965 	mask = (1 << size) - 1;
966 	lval &= mask;
967 	s[offset] &= ~(mask << bitoffs);
968 	s[offset] |= lval << bitoffs;
969     }
970     else {
971 	if (size == 8)
972 	    s[offset  ] = (U8)( lval        & 0xff);
973 	else if (size == 16) {
974 	    s[offset  ] = (U8)((lval >>  8) & 0xff);
975 	    s[offset+1] = (U8)( lval        & 0xff);
976 	}
977 	else if (size == 32) {
978 	    s[offset  ] = (U8)((lval >> 24) & 0xff);
979 	    s[offset+1] = (U8)((lval >> 16) & 0xff);
980 	    s[offset+2] = (U8)((lval >>  8) & 0xff);
981 	    s[offset+3] = (U8)( lval        & 0xff);
982 	}
983 #ifdef UV_IS_QUAD
984 	else if (size == 64) {
985 	    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
986 			   "Bit vector size > 32 non-portable");
987 	    s[offset  ] = (U8)((lval >> 56) & 0xff);
988 	    s[offset+1] = (U8)((lval >> 48) & 0xff);
989 	    s[offset+2] = (U8)((lval >> 40) & 0xff);
990 	    s[offset+3] = (U8)((lval >> 32) & 0xff);
991 	    s[offset+4] = (U8)((lval >> 24) & 0xff);
992 	    s[offset+5] = (U8)((lval >> 16) & 0xff);
993 	    s[offset+6] = (U8)((lval >>  8) & 0xff);
994 	    s[offset+7] = (U8)( lval        & 0xff);
995 	}
996 #endif
997     }
998     SvSETMAGIC(targ);
999 }
1000 
1001 void
1002 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1003 {
1004     dVAR;
1005 #ifdef LIBERAL
1006     long *dl;
1007     long *ll;
1008     long *rl;
1009 #endif
1010     char *dc;
1011     STRLEN leftlen;
1012     STRLEN rightlen;
1013     const char *lc;
1014     const char *rc;
1015     STRLEN len;
1016     STRLEN lensave;
1017     const char *lsave;
1018     const char *rsave;
1019     bool left_utf;
1020     bool right_utf;
1021     STRLEN needlen = 0;
1022 
1023     PERL_ARGS_ASSERT_DO_VOP;
1024 
1025     if (sv != left || (optype != OP_BIT_AND && !SvOK(sv)))
1026 	sv_setpvs(sv, "");	/* avoid undef warning on |= and ^= */
1027     if (sv == left) {
1028 	lsave = lc = SvPV_force_nomg(left, leftlen);
1029     }
1030     else {
1031 	lsave = lc = SvPV_nomg_const(left, leftlen);
1032 	SvPV_force_nomg_nolen(sv);
1033     }
1034     rsave = rc = SvPV_nomg_const(right, rightlen);
1035 
1036     /* This need to come after SvPV to ensure that string overloading has
1037        fired off.  */
1038 
1039     left_utf = DO_UTF8(left);
1040     right_utf = DO_UTF8(right);
1041 
1042     if (left_utf && !right_utf) {
1043 	/* Avoid triggering overloading again by using temporaries.
1044 	   Maybe there should be a variant of sv_utf8_upgrade that takes pvn
1045 	*/
1046 	right = newSVpvn_flags(rsave, rightlen, SVs_TEMP);
1047 	sv_utf8_upgrade(right);
1048 	rsave = rc = SvPV_nomg_const(right, rightlen);
1049 	right_utf = TRUE;
1050     }
1051     else if (!left_utf && right_utf) {
1052 	left = newSVpvn_flags(lsave, leftlen, SVs_TEMP);
1053 	sv_utf8_upgrade(left);
1054 	lsave = lc = SvPV_nomg_const(left, leftlen);
1055 	left_utf = TRUE;
1056     }
1057 
1058     len = leftlen < rightlen ? leftlen : rightlen;
1059     lensave = len;
1060     SvCUR_set(sv, len);
1061     (void)SvPOK_only(sv);
1062     if ((left_utf || right_utf) && (sv == left || sv == right)) {
1063 	needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
1064 	Newxz(dc, needlen + 1, char);
1065     }
1066     else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
1067 	dc = SvPV_force_nomg_nolen(sv);
1068 	if (SvLEN(sv) < len + 1) {
1069 	    dc = SvGROW(sv, len + 1);
1070 	    (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1071 	}
1072 	if (optype != OP_BIT_AND && (left_utf || right_utf))
1073 	    dc = SvGROW(sv, leftlen + rightlen + 1);
1074     }
1075     else {
1076 	needlen = optype == OP_BIT_AND
1077 		    ? len : (leftlen > rightlen ? leftlen : rightlen);
1078 	Newxz(dc, needlen + 1, char);
1079 	sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL);
1080 	dc = SvPVX(sv);		/* sv_usepvn() calls Renew() */
1081     }
1082     if (left_utf || right_utf) {
1083 	UV duc, luc, ruc;
1084 	char *dcorig = dc;
1085 	char *dcsave = NULL;
1086 	STRLEN lulen = leftlen;
1087 	STRLEN rulen = rightlen;
1088 	STRLEN ulen;
1089 
1090 	switch (optype) {
1091 	case OP_BIT_AND:
1092 	    while (lulen && rulen) {
1093 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1094 		lc += ulen;
1095 		lulen -= ulen;
1096 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1097 		rc += ulen;
1098 		rulen -= ulen;
1099 		duc = luc & ruc;
1100 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1101 	    }
1102 	    if (sv == left || sv == right)
1103 		(void)sv_usepvn(sv, dcorig, needlen);
1104 	    SvCUR_set(sv, dc - dcorig);
1105 	    break;
1106 	case OP_BIT_XOR:
1107 	    while (lulen && rulen) {
1108 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1109 		lc += ulen;
1110 		lulen -= ulen;
1111 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1112 		rc += ulen;
1113 		rulen -= ulen;
1114 		duc = luc ^ ruc;
1115 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1116 	    }
1117 	    goto mop_up_utf;
1118 	case OP_BIT_OR:
1119 	    while (lulen && rulen) {
1120 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1121 		lc += ulen;
1122 		lulen -= ulen;
1123 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1124 		rc += ulen;
1125 		rulen -= ulen;
1126 		duc = luc | ruc;
1127 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1128 	    }
1129 	  mop_up_utf:
1130 	    if (rulen)
1131 		dcsave = savepvn(rc, rulen);
1132 	    else if (lulen)
1133 		dcsave = savepvn(lc, lulen);
1134 	    if (sv == left || sv == right)
1135 		(void)sv_usepvn(sv, dcorig, needlen); /* uses Renew(); defaults to nomg */
1136 	    SvCUR_set(sv, dc - dcorig);
1137 	    if (rulen)
1138 		sv_catpvn_nomg(sv, dcsave, rulen);
1139 	    else if (lulen)
1140 		sv_catpvn_nomg(sv, dcsave, lulen);
1141 	    else
1142 		*SvEND(sv) = '\0';
1143 	    Safefree(dcsave);
1144 	    break;
1145 	default:
1146 	    if (sv == left || sv == right)
1147 		Safefree(dcorig);
1148 	    Perl_croak(aTHX_ "panic: do_vop called for op %u (%s)",
1149 			(unsigned)optype, PL_op_name[optype]);
1150 	}
1151 	SvUTF8_on(sv);
1152 	goto finish;
1153     }
1154     else
1155 #ifdef LIBERAL
1156     if (len >= sizeof(long)*4 &&
1157 	!((unsigned long)dc % sizeof(long)) &&
1158 	!((unsigned long)lc % sizeof(long)) &&
1159 	!((unsigned long)rc % sizeof(long)))	/* It's almost always aligned... */
1160     {
1161 	const STRLEN remainder = len % (sizeof(long)*4);
1162 	len /= (sizeof(long)*4);
1163 
1164 	dl = (long*)dc;
1165 	ll = (long*)lc;
1166 	rl = (long*)rc;
1167 
1168 	switch (optype) {
1169 	case OP_BIT_AND:
1170 	    while (len--) {
1171 		*dl++ = *ll++ & *rl++;
1172 		*dl++ = *ll++ & *rl++;
1173 		*dl++ = *ll++ & *rl++;
1174 		*dl++ = *ll++ & *rl++;
1175 	    }
1176 	    break;
1177 	case OP_BIT_XOR:
1178 	    while (len--) {
1179 		*dl++ = *ll++ ^ *rl++;
1180 		*dl++ = *ll++ ^ *rl++;
1181 		*dl++ = *ll++ ^ *rl++;
1182 		*dl++ = *ll++ ^ *rl++;
1183 	    }
1184 	    break;
1185 	case OP_BIT_OR:
1186 	    while (len--) {
1187 		*dl++ = *ll++ | *rl++;
1188 		*dl++ = *ll++ | *rl++;
1189 		*dl++ = *ll++ | *rl++;
1190 		*dl++ = *ll++ | *rl++;
1191 	    }
1192 	}
1193 
1194 	dc = (char*)dl;
1195 	lc = (char*)ll;
1196 	rc = (char*)rl;
1197 
1198 	len = remainder;
1199     }
1200 #endif
1201     {
1202 	switch (optype) {
1203 	case OP_BIT_AND:
1204 	    while (len--)
1205 		*dc++ = *lc++ & *rc++;
1206 	    *dc = '\0';
1207 	    break;
1208 	case OP_BIT_XOR:
1209 	    while (len--)
1210 		*dc++ = *lc++ ^ *rc++;
1211 	    goto mop_up;
1212 	case OP_BIT_OR:
1213 	    while (len--)
1214 		*dc++ = *lc++ | *rc++;
1215 	  mop_up:
1216 	    len = lensave;
1217 	    if (rightlen > len)
1218 		sv_catpvn_nomg(sv, rsave + len, rightlen - len);
1219 	    else if (leftlen > (STRLEN)len)
1220 		sv_catpvn_nomg(sv, lsave + len, leftlen - len);
1221 	    else
1222 		*SvEND(sv) = '\0';
1223 	    break;
1224 	}
1225     }
1226 finish:
1227     SvTAINT(sv);
1228 }
1229 
1230 OP *
1231 Perl_do_kv(pTHX)
1232 {
1233     dVAR;
1234     dSP;
1235     HV * const keys = MUTABLE_HV(POPs);
1236     HE *entry;
1237     const I32 gimme = GIMME_V;
1238     const I32 dokv =     (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV);
1239     /* op_type is OP_RKEYS/OP_RVALUES if pp_rkeys delegated to here */
1240     const I32 dokeys =   dokv || (PL_op->op_type == OP_KEYS || PL_op->op_type == OP_RKEYS);
1241     const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES || PL_op->op_type == OP_RVALUES);
1242 
1243     (void)hv_iterinit(keys);	/* always reset iterator regardless */
1244 
1245     if (gimme == G_VOID)
1246 	RETURN;
1247 
1248     if (gimme == G_SCALAR) {
1249 	if (PL_op->op_flags & OPf_MOD || LVRET) {	/* lvalue */
1250 	    SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));  /* Not TARG RT#67838 */
1251 	    sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0);
1252 	    LvTYPE(ret) = 'k';
1253 	    LvTARG(ret) = SvREFCNT_inc_simple(keys);
1254 	    PUSHs(ret);
1255 	}
1256 	else {
1257 	    IV i;
1258 	    dTARGET;
1259 
1260 	    if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) {
1261 		i = HvUSEDKEYS(keys);
1262 	    }
1263 	    else {
1264 		i = 0;
1265 		while (hv_iternext(keys)) i++;
1266 	    }
1267 	    PUSHi( i );
1268 	}
1269 	RETURN;
1270     }
1271 
1272     EXTEND(SP, HvUSEDKEYS(keys) * (dokeys + dovalues));
1273 
1274     PUTBACK;	/* hv_iternext and hv_iterval might clobber stack_sp */
1275     while ((entry = hv_iternext(keys))) {
1276 	SPAGAIN;
1277 	if (dokeys) {
1278 	    SV* const sv = hv_iterkeysv(entry);
1279 	    XPUSHs(sv);	/* won't clobber stack_sp */
1280 	}
1281 	if (dovalues) {
1282 	    SV *tmpstr;
1283 	    PUTBACK;
1284 	    tmpstr = hv_iterval(keys,entry);
1285 	    DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
1286 			    (unsigned long)HeHASH(entry),
1287 			    (int)HvMAX(keys)+1,
1288 			    (unsigned long)(HeHASH(entry) & HvMAX(keys))));
1289 	    SPAGAIN;
1290 	    XPUSHs(tmpstr);
1291 	}
1292 	PUTBACK;
1293     }
1294     return NORMAL;
1295 }
1296 
1297 /*
1298  * Local variables:
1299  * c-indentation-style: bsd
1300  * c-basic-offset: 4
1301  * indent-tabs-mode: nil
1302  * End:
1303  *
1304  * ex: set ts=8 sts=4 sw=4 et:
1305  */
1306