1#!./perl -T 2# tests whether tainting works with UTF-8 3 4BEGIN { 5 chdir 't' if -d 't'; 6 require './test.pl'; 7 set_up_inc('../lib'); 8} 9 10use strict; 11use Config; 12 13# How to identify taint when you see it 14sub any_tainted (@) { 15 not eval { join("",@_), kill 0; 1 }; 16} 17sub tainted ($) { 18 any_tainted @_; 19} 20 21plan(tests => 3*10 + 3*8 + 2*16 + 3); 22 23my $arg = $ENV{PATH}; # a tainted value 24use constant UTF8 => "\x{1234}"; 25 26*is_utf8 = \&utf8::is_utf8; 27 28for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { 29 my $encode = $ary->[0]; 30 my $string = $ary->[1]; 31 32 my $taint = $arg; substr($taint, 0) = $ary->[1]; 33 34 is(tainted($taint), tainted($arg), "tainted: $encode, before test"); 35 36 my $lconcat = $taint; 37 $lconcat .= UTF8; 38 is($lconcat, $string.UTF8, "compare: $encode, concat left"); 39 40 is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left"); 41 42 my $rconcat = UTF8; 43 $rconcat .= $taint; 44 is($rconcat, UTF8.$string, "compare: $encode, concat right"); 45 46 is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right"); 47 48 my $ljoin = join('!', $taint, UTF8); 49 is($ljoin, join('!', $string, UTF8), "compare: $encode, join left"); 50 51 is(tainted($ljoin), tainted($arg), "tainted: $encode, join left"); 52 53 my $rjoin = join('!', UTF8, $taint); 54 is($rjoin, join('!', UTF8, $string), "compare: $encode, join right"); 55 56 is(tainted($rjoin), tainted($arg), "tainted: $encode, join right"); 57 58 is(tainted($taint), tainted($arg), "tainted: $encode, after test"); 59} 60 61 62for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { 63 my $encode = $ary->[0]; 64 65 my $utf8 = pack('U*') . $ary->[1]; 66 my $byte = unpack('U0a*', $utf8); 67 68 my $taint = $arg; substr($taint, 0) = $utf8; 69 utf8::encode($taint); 70 71 is($taint, $byte, "compare: $encode, encode utf8"); 72 73 is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8"); 74 75 ok(!is_utf8($taint), "is_utf8: $encode, encode utf8"); 76 77 is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8"); 78 79 my $taint = $arg; substr($taint, 0) = $byte; 80 utf8::decode($taint); 81 82 is($taint, $utf8, "compare: $encode, decode byte"); 83 84 is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte"); 85 86 is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte"); 87 88 is(tainted($taint), tainted($arg), "tainted: $encode, decode byte"); 89} 90 91 92for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) { 93 my $encode = $ary->[0]; 94 95 my $up = pack('U*') . $ary->[1]; 96 my $down = pack("a*", $ary->[1]); 97 98 my $taint = $arg; substr($taint, 0) = $up; 99 utf8::upgrade($taint); 100 101 is($taint, $up, "compare: $encode, upgrade up"); 102 103 is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up"); 104 105 ok(is_utf8($taint), "is_utf8: $encode, upgrade up"); 106 107 is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up"); 108 109 my $taint = $arg; substr($taint, 0) = $down; 110 utf8::upgrade($taint); 111 112 is($taint, $up, "compare: $encode, upgrade down"); 113 114 is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down"); 115 116 ok(is_utf8($taint), "is_utf8: $encode, upgrade down"); 117 118 is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down"); 119 120 my $taint = $arg; substr($taint, 0) = $up; 121 utf8::downgrade($taint); 122 123 is($taint, $down, "compare: $encode, downgrade up"); 124 125 is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up"); 126 127 ok(!is_utf8($taint), "is_utf8: $encode, downgrade up"); 128 129 is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up"); 130 131 my $taint = $arg; substr($taint, 0) = $down; 132 utf8::downgrade($taint); 133 134 is($taint, $down, "compare: $encode, downgrade down"); 135 136 is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down"); 137 138 ok(!is_utf8($taint), "is_utf8: $encode, downgrade down"); 139 140 is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down"); 141} 142 143SKIP: { 144 if (is_miniperl()) { 145 skip_if_miniperl("Unicode tables not built yet", 2) 146 unless eval 'require "unicore/UCD.pl"'; 147 } 148 fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,', 149 'ok', {switches => ["-T", "-l"]}, 150 "matching a regexp is taint agnostic"); 151 152 fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,', 153 'ok', {switches => ["-T", "-l"]}, 154 "therefore swash_init should be taint agnostic"); 155} 156 157{ 158 # RT #122148: s///e on tainted utf8 strings got pos() messed up in 5.20 159 160 my @p; 161 my $s = "\x{100}\x{100}\x{100}\x{100}". $^X; 162 $s =~ s/\x{100}/push @p, pos($s); "xxxx";/eg; 163 is("@p", "0 1 2 3", "RT #122148"); 164} 165