xref: /openbsd/gnu/usr.bin/perl/t/re/subst.t (revision cca36db2)
1#!./perl -w
2
3BEGIN {
4    chdir 't' if -d 't';
5    @INC = '../lib';
6    require Config; import Config;
7}
8
9require './test.pl';
10plan( tests => 143 );
11
12$x = 'foo';
13$_ = "x";
14s/x/\$x/;
15ok( $_ eq '$x', ":$_: eq :\$x:" );
16
17$_ = "x";
18s/x/$x/;
19ok( $_ eq 'foo', ":$_: eq :foo:" );
20
21$_ = "x";
22s/x/\$x $x/;
23ok( $_ eq '$x foo', ":$_: eq :\$x foo:" );
24
25$b = 'cd';
26($a = 'abcdef') =~ s<(b${b}e)>'\n$1';
27ok( $1 eq 'bcde' && $a eq 'a\n$1f', ":$1: eq :bcde: ; :$a: eq :a\\n\$1f:" );
28
29$a = 'abacada';
30ok( ($a =~ s/a/x/g) == 4 && $a eq 'xbxcxdx' );
31
32ok( ($a =~ s/a/y/g) == 0 && $a eq 'xbxcxdx' );
33
34ok( ($a =~ s/b/y/g) == 1 && $a eq 'xyxcxdx' );
35
36$_ = 'ABACADA';
37ok( /a/i && s///gi && $_ eq 'BCD' );
38
39$_ = '\\' x 4;
40ok( length($_) == 4 );
41$snum = s/\\/\\\\/g;
42ok( $_ eq '\\' x 8 && $snum == 4 );
43
44$_ = '\/' x 4;
45ok( length($_) == 8 );
46$snum = s/\//\/\//g;
47ok( $_ eq '\\//' x 4 && $snum == 4 );
48ok( length($_) == 12 );
49
50$_ = 'aaaXXXXbbb';
51s/^a//;
52ok( $_ eq 'aaXXXXbbb' );
53
54$_ = 'aaaXXXXbbb';
55s/a//;
56ok( $_ eq 'aaXXXXbbb' );
57
58$_ = 'aaaXXXXbbb';
59s/^a/b/;
60ok( $_ eq 'baaXXXXbbb' );
61
62$_ = 'aaaXXXXbbb';
63s/a/b/;
64ok( $_ eq 'baaXXXXbbb' );
65
66$_ = 'aaaXXXXbbb';
67s/aa//;
68ok( $_ eq 'aXXXXbbb' );
69
70$_ = 'aaaXXXXbbb';
71s/aa/b/;
72ok( $_ eq 'baXXXXbbb' );
73
74$_ = 'aaaXXXXbbb';
75s/b$//;
76ok( $_ eq 'aaaXXXXbb' );
77
78$_ = 'aaaXXXXbbb';
79s/b//;
80ok( $_ eq 'aaaXXXXbb' );
81
82$_ = 'aaaXXXXbbb';
83s/bb//;
84ok( $_ eq 'aaaXXXXb' );
85
86$_ = 'aaaXXXXbbb';
87s/aX/y/;
88ok( $_ eq 'aayXXXbbb' );
89
90$_ = 'aaaXXXXbbb';
91s/Xb/z/;
92ok( $_ eq 'aaaXXXzbb' );
93
94$_ = 'aaaXXXXbbb';
95s/aaX.*Xbb//;
96ok( $_ eq 'ab' );
97
98$_ = 'aaaXXXXbbb';
99s/bb/x/;
100ok( $_ eq 'aaaXXXXxb' );
101
102# now for some unoptimized versions of the same.
103
104$_ = 'aaaXXXXbbb';
105$x ne $x || s/^a//;
106ok( $_ eq 'aaXXXXbbb' );
107
108$_ = 'aaaXXXXbbb';
109$x ne $x || s/a//;
110ok( $_ eq 'aaXXXXbbb' );
111
112$_ = 'aaaXXXXbbb';
113$x ne $x || s/^a/b/;
114ok( $_ eq 'baaXXXXbbb' );
115
116$_ = 'aaaXXXXbbb';
117$x ne $x || s/a/b/;
118ok( $_ eq 'baaXXXXbbb' );
119
120$_ = 'aaaXXXXbbb';
121$x ne $x || s/aa//;
122ok( $_ eq 'aXXXXbbb' );
123
124$_ = 'aaaXXXXbbb';
125$x ne $x || s/aa/b/;
126ok( $_ eq 'baXXXXbbb' );
127
128$_ = 'aaaXXXXbbb';
129$x ne $x || s/b$//;
130ok( $_ eq 'aaaXXXXbb' );
131
132$_ = 'aaaXXXXbbb';
133$x ne $x || s/b//;
134ok( $_ eq 'aaaXXXXbb' );
135
136$_ = 'aaaXXXXbbb';
137$x ne $x || s/bb//;
138ok( $_ eq 'aaaXXXXb' );
139
140$_ = 'aaaXXXXbbb';
141$x ne $x || s/aX/y/;
142ok( $_ eq 'aayXXXbbb' );
143
144$_ = 'aaaXXXXbbb';
145$x ne $x || s/Xb/z/;
146ok( $_ eq 'aaaXXXzbb' );
147
148$_ = 'aaaXXXXbbb';
149$x ne $x || s/aaX.*Xbb//;
150ok( $_ eq 'ab' );
151
152$_ = 'aaaXXXXbbb';
153$x ne $x || s/bb/x/;
154ok( $_ eq 'aaaXXXXxb' );
155
156$_ = 'abc123xyz';
157s/(\d+)/$1*2/e;              # yields 'abc246xyz'
158ok( $_ eq 'abc246xyz' );
159s/(\d+)/sprintf("%5d",$1)/e; # yields 'abc  246xyz'
160ok( $_ eq 'abc  246xyz' );
161s/(\w)/$1 x 2/eg;            # yields 'aabbcc  224466xxyyzz'
162ok( $_ eq 'aabbcc  224466xxyyzz' );
163
164$_ = "aaaaa";
165ok( y/a/b/ == 5 );
166ok( y/a/b/ == 0 );
167ok( y/b// == 5 );
168ok( y/b/c/s == 5 );
169ok( y/c// == 1 );
170ok( y/c//d == 1 );
171ok( $_ eq "" );
172
173$_ = "Now is the %#*! time for all good men...";
174ok( ($x=(y/a-zA-Z //cd)) == 7 );
175ok( y/ / /s == 8 );
176
177$_ = 'abcdefghijklmnopqrstuvwxyz0123456789';
178tr/a-z/A-Z/;
179
180ok( $_ eq 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' );
181
182# same as tr/A-Z/a-z/;
183if (defined $Config{ebcdic} && $Config{ebcdic} eq 'define') {	# EBCDIC.
184    no utf8;
185    y[\301-\351][\201-\251];
186} else {		# Ye Olde ASCII.  Or something like it.
187    y[\101-\132][\141-\172];
188}
189
190ok( $_ eq 'abcdefghijklmnopqrstuvwxyz0123456789' );
191
192SKIP: {
193    skip("not ASCII",1) unless (ord("+") == ord(",") - 1
194			     && ord(",") == ord("-") - 1
195			     && ord("a") == ord("b") - 1
196			     && ord("b") == ord("c") - 1);
197    $_ = '+,-';
198    tr/+--/a-c/;
199    ok( $_ eq 'abc' );
200}
201
202$_ = '+,-';
203tr/+\--/a\/c/;
204ok( $_ eq 'a,/' );
205
206$_ = '+,-';
207tr/-+,/ab\-/;
208ok( $_ eq 'b-a' );
209
210
211# test recursive substitutions
212# code based on the recursive expansion of makefile variables
213
214my %MK = (
215    AAAAA => '$(B)', B=>'$(C)', C => 'D',			# long->short
216    E     => '$(F)', F=>'p $(G) q', G => 'HHHHH',	# short->long
217    DIR => '$(UNDEFINEDNAME)/xxx',
218);
219sub var {
220    my($var,$level) = @_;
221    return "\$($var)" unless exists $MK{$var};
222    return exp_vars($MK{$var}, $level+1); # can recurse
223}
224sub exp_vars {
225    my($str,$level) = @_;
226    $str =~ s/\$\((\w+)\)/var($1, $level+1)/ge; # can recurse
227    #warn "exp_vars $level = '$str'\n";
228    $str;
229}
230
231ok( exp_vars('$(AAAAA)',0)           eq 'D' );
232ok( exp_vars('$(E)',0)               eq 'p HHHHH q' );
233ok( exp_vars('$(DIR)',0)             eq '$(UNDEFINEDNAME)/xxx' );
234ok( exp_vars('foo $(DIR)/yyy bar',0) eq 'foo $(UNDEFINEDNAME)/xxx/yyy bar' );
235
236$_ = "abcd";
237s/(..)/$x = $1, m#.#/eg;
238ok( $x eq "cd", 'a match nested in the RHS of a substitution' );
239
240# Subst and lookbehind
241
242$_="ccccc";
243$snum = s/(?<!x)c/x/g;
244ok( $_ eq "xxxxx" && $snum == 5 );
245
246$_="ccccc";
247$snum = s/(?<!x)(c)/x/g;
248ok( $_ eq "xxxxx" && $snum == 5 );
249
250$_="foobbarfoobbar";
251$snum = s/(?<!r)foobbar/foobar/g;
252ok( $_ eq "foobarfoobbar" && $snum == 1 );
253
254$_="foobbarfoobbar";
255$snum = s/(?<!ar)(foobbar)/foobar/g;
256ok( $_ eq "foobarfoobbar" && $snum == 1 );
257
258$_="foobbarfoobbar";
259$snum = s/(?<!ar)foobbar/foobar/g;
260ok( $_ eq "foobarfoobbar" && $snum == 1 );
261
262eval 's{foo} # this is a comment, not a delimiter
263       {bar};';
264ok( ! @?, 'parsing of split subst with comment' );
265
266$snum = eval '$_="exactly"; s sxsys;m 3(yactl)3;$1';
267is( $snum, 'yactl', 'alpha delimiters are allowed' );
268
269$_="baacbaa";
270$snum = tr/a/b/s;
271ok( $_ eq "bbcbb" && $snum == 4,
272    'check if squashing works at the end of string' );
273
274$_ = "ab";
275ok( s/a/b/ == 1 );
276
277$_ = <<'EOL';
278     $url = new URI::URL "http://www/";   die if $url eq "xXx";
279EOL
280$^R = 'junk';
281
282$foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' .
283  ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' .
284  ' lowercase $@%#MiXeD$@%# ';
285
286$snum =
287s{  \d+          \b [,.;]? (?{ 'digits' })
288   |
289    [a-z]+       \b [,.;]? (?{ 'lowercase' })
290   |
291    [A-Z]+       \b [,.;]? (?{ 'UPPERCASE' })
292   |
293    [A-Z] [a-z]+ \b [,.;]? (?{ 'Capitalized' })
294   |
295    [A-Za-z]+    \b [,.;]? (?{ 'MiXeD' })
296   |
297    [A-Za-z0-9]+ \b [,.;]? (?{ 'alphanumeric' })
298   |
299    \s+                    (?{ ' ' })
300   |
301    [^A-Za-z0-9\s]+          (?{ '$@%#' })
302}{$^R}xg;
303ok( $_ eq $foo );
304ok( $snum == 31 );
305
306$_ = 'a' x 6;
307$snum = s/a(?{})//g;
308ok( $_ eq '' && $snum == 6 );
309
310$_ = 'x' x 20;
311$snum = s/(\d*|x)/<$1>/g;
312$foo = '<>' . ('<x><>' x 20) ;
313ok( $_ eq $foo && $snum == 41 );
314
315$t = 'aaaaaaaaa';
316
317$_ = $t;
318pos = 6;
319$snum = s/\Ga/xx/g;
320ok( $_ eq 'aaaaaaxxxxxx' && $snum == 3 );
321
322$_ = $t;
323pos = 6;
324$snum = s/\Ga/x/g;
325ok( $_ eq 'aaaaaaxxx' && $snum == 3 );
326
327$_ = $t;
328pos = 6;
329s/\Ga/xx/;
330ok( $_ eq 'aaaaaaxxaa' );
331
332$_ = $t;
333pos = 6;
334s/\Ga/x/;
335ok( $_ eq 'aaaaaaxaa' );
336
337$_ = $t;
338$snum = s/\Ga/xx/g;
339ok( $_ eq 'xxxxxxxxxxxxxxxxxx' && $snum == 9 );
340
341$_ = $t;
342$snum = s/\Ga/x/g;
343ok( $_ eq 'xxxxxxxxx' && $snum == 9 );
344
345$_ = $t;
346s/\Ga/xx/;
347ok( $_ eq 'xxaaaaaaaa' );
348
349$_ = $t;
350s/\Ga/x/;
351ok( $_ eq 'xaaaaaaaa' );
352
353$_ = 'aaaa';
354$snum = s/\ba/./g;
355ok( $_ eq '.aaa' && $snum == 1 );
356
357eval q% s/a/"b"}/e %;
358ok( $@ =~ /Bad evalled substitution/ );
359eval q% ($_ = "x") =~ s/(.)/"$1 "/e %;
360ok( $_ eq "x " and !length $@ );
361$x = $x = 'interp';
362eval q% ($_ = "x") =~ s/x(($x)*)/"$1"/e %;
363ok( $_ eq '' and !length $@ );
364
365$_ = "C:/";
366ok( !s/^([a-z]:)/\u$1/ );
367
368$_ = "Charles Bronson";
369$snum = s/\B\w//g;
370ok( $_ eq "C B" && $snum == 12 );
371
372{
373    use utf8;
374    my $s = "H\303\266he";
375    my $l = my $r = $s;
376    $l =~ s/[^\w]//g;
377    $r =~ s/[^\w\.]//g;
378    is($l, $r, "use utf8 \\w");
379}
380
381my $pv1 = my $pv2  = "Andreas J. K\303\266nig";
382$pv1 =~ s/A/\x{100}/;
383substr($pv2,0,1) = "\x{100}";
384is($pv1, $pv2);
385
386SKIP: {
387    skip("EBCDIC", 3) if ord("A") == 193;
388
389    {
390	# Gregor Chrupala <gregor.chrupala@star-group.net>
391	use utf8;
392	$a = 'Espa&ntilde;a';
393	$a =~ s/&ntilde;/ñ/;
394	like($a, qr/ñ/, "use utf8 RHS");
395    }
396
397    {
398	use utf8;
399	$a = 'España España';
400	$a =~ s/ñ/&ntilde;/;
401	like($a, qr/ñ/, "use utf8 LHS");
402    }
403
404    {
405	use utf8;
406	$a = 'España';
407	$a =~ s/ñ/ñ/;
408	like($a, qr/ñ/, "use utf8 LHS and RHS");
409    }
410}
411
412{
413    # SADAHIRO Tomoyuki <bqw10602@nifty.com>
414
415    $a = "\x{100}\x{101}";
416    $a =~ s/\x{101}/\xFF/;
417    like($a, qr/\xFF/);
418    is(length($a), 2, "SADAHIRO utf8 s///");
419
420    $a = "\x{100}\x{101}";
421    $a =~ s/\x{101}/"\xFF"/e;
422    like($a, qr/\xFF/);
423    is(length($a), 2);
424
425    $a = "\x{100}\x{101}";
426    $a =~ s/\x{101}/\xFF\xFF\xFF/;
427    like($a, qr/\xFF\xFF\xFF/);
428    is(length($a), 4);
429
430    $a = "\x{100}\x{101}";
431    $a =~ s/\x{101}/"\xFF\xFF\xFF"/e;
432    like($a, qr/\xFF\xFF\xFF/);
433    is(length($a), 4);
434
435    $a = "\xFF\x{101}";
436    $a =~ s/\xFF/\x{100}/;
437    like($a, qr/\x{100}/);
438    is(length($a), 2);
439
440    $a = "\xFF\x{101}";
441    $a =~ s/\xFF/"\x{100}"/e;
442    like($a, qr/\x{100}/);
443    is(length($a), 2);
444
445    $a = "\xFF";
446    $a =~ s/\xFF/\x{100}/;
447    like($a, qr/\x{100}/);
448    is(length($a), 1);
449
450    $a = "\xFF";
451    $a =~ s/\xFF/"\x{100}"/e;
452    like($a, qr/\x{100}/);
453    is(length($a), 1);
454}
455
456{
457    # subst with mixed utf8/non-utf8 type
458    my($ua, $ub, $uc, $ud) = ("\x{101}", "\x{102}", "\x{103}", "\x{104}");
459    my($na, $nb) = ("\x{ff}", "\x{fe}");
460    my $a = "$ua--$ub";
461    my $b;
462    ($b = $a) =~ s/--/$na/;
463    is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8");
464    ($b = $a) =~ s/--/--$na--/;
465    is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8");
466    ($b = $a) =~ s/--/$uc/;
467    is($b, "$ua$uc$ub", "s///: replace utf8 into utf8");
468    ($b = $a) =~ s/--/--$uc--/;
469    is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8");
470    $a = "$na--$nb";
471    ($b = $a) =~ s/--/$ua/;
472    is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8");
473    ($b = $a) =~ s/--/--$ua--/;
474    is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8");
475
476    # now with utf8 pattern
477    $a = "$ua--$ub";
478    ($b = $a) =~ s/-($ud)?-/$na/;
479    is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8 (utf8 pattern)");
480    ($b = $a) =~ s/-($ud)?-/--$na--/;
481    is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8 (utf8 pattern)");
482    ($b = $a) =~ s/-($ud)?-/$uc/;
483    is($b, "$ua$uc$ub", "s///: replace utf8 into utf8 (utf8 pattern)");
484    ($b = $a) =~ s/-($ud)?-/--$uc--/;
485    is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8 (utf8 pattern)");
486    $a = "$na--$nb";
487    ($b = $a) =~ s/-($ud)?-/$ua/;
488    is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8 (utf8 pattern)");
489    ($b = $a) =~ s/-($ud)?-/--$ua--/;
490    is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8 (utf8 pattern)");
491    ($b = $a) =~ s/-($ud)?-/$na/;
492    is($b, "$na$na$nb", "s///: replace non-utf8 into non-utf8 (utf8 pattern)");
493    ($b = $a) =~ s/-($ud)?-/--$na--/;
494    is($b, "$na--$na--$nb", "s///: replace long non-utf8 into non-utf8 (utf8 pattern)");
495}
496
497$_ = 'aaaa';
498$r = 'x';
499$s = s/a(?{})/$r/g;
500is("<$_> <$s>", "<xxxx> <4>", "[perl #7806]");
501
502$_ = 'aaaa';
503$s = s/a(?{})//g;
504is("<$_> <$s>", "<> <4>", "[perl #7806]");
505
506# [perl #19048] Coredump in silly replacement
507{
508    local $^W = 0;
509    $_="abcdef\n";
510    s!.!!eg;
511    is($_, "\n", "[perl #19048]");
512}
513
514# [perl #17757] interaction between saw_ampersand and study
515{
516    my $f = eval q{ $& };
517    $f = "xx";
518    study $f;
519    $f =~ s/x/y/g;
520    is($f, "yy", "[perl #17757]");
521}
522
523# [perl #20684] returned a zero count
524$_ = "1111";
525is(s/(??{1})/2/eg, 4, '#20684 s/// with (??{..}) inside');
526
527# [perl #20682] @- not visible in replacement
528$_ = "123";
529/(2)/;	# seed @- with something else
530s/(1)(2)(3)/$#- (@-)/;
531is($_, "3 (0 0 1 2)", '#20682 @- not visible in replacement');
532
533# [perl #20682] $^N not visible in replacement
534$_ = "abc";
535/(a)/; s/(b)|(c)/-$^N/g;
536is($_,'a-b-c','#20682 $^N not visible in replacement');
537
538# [perl #22351] perl bug with 'e' substitution modifier
539my $name = "chris";
540{
541    no warnings 'uninitialized';
542    $name =~ s/hr//e;
543}
544is($name, "cis", q[#22351 bug with 'e' substitution modifier]);
545
546
547# [perl #34171] $1 didn't honour 'use bytes' in s//e
548{
549    my $s="\x{100}";
550    my $x;
551    {
552	use bytes;
553	$s=~ s/(..)/$x=$1/e
554    }
555    is(length($x), 2, '[perl #34171]');
556}
557
558
559{ # [perl #27940] perlbug: [\x00-\x1f] works, [\c@-\c_] does not
560    my $c;
561
562    ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ s/[\c@-\c_]//g;
563    is($c, "\x20\x30\x40\x50\x60", "s/[\\c\@-\\c_]//g");
564
565    ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ s/[\x00-\x1f]//g;
566    is($c, "\x20\x30\x40\x50\x60", "s/[\\x00-\\x1f]//g");
567}
568{
569    $_ = "xy";
570    no warnings 'uninitialized';
571    /(((((((((x)))))))))(z)/;	# clear $10
572    s/(((((((((x)))))))))(y)/${10}/;
573    is($_,"y","RT#6006: \$_ eq '$_'");
574    $_ = "xr";
575    s/(((((((((x)))))))))(r)/fooba${10}/;
576    is($_,"foobar","RT#6006: \$_ eq '$_'");
577}
578{
579    my $want=("\n" x 11).("B\n" x 11)."B";
580    $_="B";
581    our $i;
582    for $i(1..11){
583	s/^.*$/$&/gm;
584	$_="\n$_\n$&";
585    }
586    is($want,$_,"RT#17542");
587}
588
589{
590    my @tests = ('ABC', "\xA3\xA4\xA5", "\x{410}\x{411}\x{412}");
591    foreach (@tests) {
592	my $id = ord $_;
593	s/./pos/ge;
594	is($_, "012", "RT#52104: $id");
595    }
596}
597
598fresh_perl_is( '$_=q(foo);s/(.)\G//g;print' => 'foo', '[perl #69056] positive GPOS regex segfault' );
599fresh_perl_is( '$_="abcef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'aXX[c-e][e-f]f', 'positive GPOS regex substitution failure' );
600
601# [perl #~~~~~] $var =~ s/$qr//e calling get-magic on $_ as well as $var
602{
603 local *_;
604 my $scratch;
605 sub qrBug::TIESCALAR { bless[pop], 'qrBug' }
606 sub qrBug::FETCH { $scratch .= "[fetching $_[0][0]]"; 'prew' }
607 sub qrBug::STORE{}
608 tie my $kror, qrBug => '$kror';
609 tie $_, qrBug => '$_';
610 my $qr = qr/(?:)/;
611 $kror =~ s/$qr/""/e;
612 is(
613   $scratch, '[fetching $kror]',
614  'bug: $var =~ s/$qr//e calling get-magic on $_ as well as $var',
615 );
616}
617