1#!./perl 2 3BEGIN { pop @INC if $INC[-1] eq '.' } 4use strict; 5use Encode; 6use Getopt::Std; 7my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt); 8$Opt{p} ||= $Opt{P}; 9$Opt{e} ||= 'utf8'; 10$Opt{f} ||= $Opt{e}; 11$Opt{t} ||= $Opt{e}; 12$Opt{h} and help(); 13 14my ($linebuf, $outbuf); 15my $CPL = $Opt{p} ? 64 : 8; 16my $linenum; 17my $linesperheading = $Opt{H}; 18my $nchars; 19our $PrevChunk; 20 21$Opt{h} and help(); 22$Opt{p} and do_perl($Opt{s}); 23do_dump($Opt{s}); 24exit; 25 26# 27 28sub do_perl{ 29 my $string = shift; 30 $Opt{P} and print "#!$^X -w\nprint\n"; 31 unless ($string){ 32 while(<>){ 33 use utf8; 34 $linebuf .= Encode::decode($Opt{f}, $_); 35 while($linebuf){ 36 my $chr = render_p(substr($linebuf, 0, 1, '')); 37 length($outbuf) + length($chr) > $CPL and print_P(); 38 $outbuf .= $chr; 39 } 40 } 41 $outbuf and print print_P(";"); 42 }else{ 43 while($string){ 44 my $chr = render_p(substr($string, 0, 1, '')); 45 length($outbuf) + length($chr) > $CPL and print_P(); 46 $outbuf .= $chr; 47 } 48 } 49 $outbuf and print print_P(";"); 50 exit; 51} 52 53sub render_p{ 54 my ($chr, $format) = @_; 55 our %S2pstr; 56 $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n... 57 $chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable; 58 my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ? 59 q(\x%x) : q(\x{%x}); 60 return sprintf $fmt, ord($chr); 61} 62 63sub print_P{ 64 my $end = shift; 65 $outbuf or return; 66 print '"', encode($Opt{t}, $outbuf), '"'; 67 my $tail = $Opt{P} ? $end ? "$end" : "," : ''; 68 print $tail, "\n"; 69 $outbuf = ''; 70} 71 72sub do_dump{ 73 my $string = shift; 74 !$Opt{p} and exists $Opt{H} and print_H(); 75 unless ($string){ 76 while(<>){ 77 use utf8; 78 $linebuf .= Encode::decode($Opt{f}, $_); 79 while (length($linebuf) > $CPL){ 80 my $chunk = substr($linebuf, 0, $CPL, ''); 81 print_C($chunk, $linenum++); 82 $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); 83 } 84 } 85 $linebuf and print_C($linebuf); 86 }else{ 87 while ($string){ 88 my $chunk = substr($string, 0, $CPL, ''); 89 print_C($chunk, $linenum++); 90 $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); 91 } 92 } 93 exit; 94} 95 96sub print_S{ 97 print "--------+------------------------------------------------"; 98 if ($Opt{C}){ 99 print "-+-----------------"; 100 } 101 print "\n"; 102} 103sub print_H{ 104 print " Offset 0 1 2 3 4 5 6 7"; 105 if ($Opt{C}){ 106 print " | 0 1 2 3 4 5 6 7"; 107 } 108 print "\n"; 109 print_S; 110} 111 112sub print_C{ 113 my ($chunk, $linenum) = @_; 114 if (!$Opt{v} and $chunk eq $PrevChunk){ 115 printf "%08x *\n", $linenum*8; return; 116 } 117 $PrevChunk = $chunk; 118 my $end = length($chunk) - 1; 119 my (@ord, @chr); 120 for my $i (0..$end){ 121 use utf8; 122 my $chr = substr($chunk,$i,1); 123 my $ord = ord($chr); 124 my $fmt = $ord <= 0xffff ? " %04x" : " %05x"; 125 push @ord, (sprintf $fmt, $ord); 126 $Opt{C} and push @chr, render_c($chr); 127 } 128 if (++$end < 7){ 129 for my $i ($end..7){ 130 push @ord, (" " x 6); 131 } 132 } 133 my $line = sprintf "%08x %s", $linenum*8, join('', @ord); 134 $Opt{C} and $line .= sprintf " | %s", join('', @chr); 135 print encode($Opt{t}, $line), "\n"; 136} 137 138sub render_c{ 139 my ($chr, $format) = @_; 140 our (%S2str, $IsFullWidth); 141 $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || " "; 142 $chr =~ $IsFullWidth and return $chr; # as is 143 return " " . $chr; 144} 145 146sub help{ 147 my $message = shift; 148 use File::Basename; 149 my $name = basename($0); 150 $message and print STDERR "$name error: $message\n"; 151 print STDERR <<"EOT"; 152Usage: 153 $name -[options...] [files...] 154 $name -[options...] -s "string" 155 $name -h 156 -h prints this message. 157Inherited from hexdump; 158 -C Canonical unidump mode 159 -v prints the duplicate line as is. Without this option, 160 single "*" will be printed instead. 161For unidump only 162 -p prints in perl literals that you can copy and paste directly 163 to your perl script. 164 -P prints in perl executable format! 165 -u prints a bunch of "Uxxxx,". Handy when you want to pass your 166 characters in mailing lists. 167IO Options: 168 -e io_encoding same as "-f io_encoding -t io_encoding" 169 -f from_encoding convert the source stream from this encoding 170 -t to_encoding print to STDOUT in this encoding 171 -s string "string" will be converted instead of STDIN. 172 -H nline prints separater for each nlines of output. 173 0 means only the table headding be printed. 174EOT 175 exit; 176} 177 178BEGIN{ 179 our %S2pstr= ( 180 "\\" => '\\\\', 181 "\0" => '\0', 182 "\t" => '\t', 183 "\n" => '\n', 184 "\r" => '\r', 185 "\v" => '\v', 186 "\a" => '\a', 187 "\e" => '\e', 188 "\"" => qq(\\\"), 189 "\'" => qq(\\\'), 190 '$' => '\$', 191 "@" => '\@', 192 "%" => '\%', 193 ); 194 195 our %S2str = ( 196 qq(\x00) => q(\0), # NULL 197 qq(\x01) => q(^A), # START OF HEADING 198 qq(\x02) => q(^B), # START OF TEXT 199 qq(\x03) => q(^C), # END OF TEXT 200 qq(\x04) => q(^D), # END OF TRANSMISSION 201 qq(\x05) => q(^E), # ENQUIRY 202 qq(\x06) => q(^F), # ACKNOWLEDGE 203 qq(\x07) => q(\a), # BELL 204 qq(\x08) => q(^H), # BACKSPACE 205 qq(\x09) => q(\t), # HORIZONTAL TABULATION 206 qq(\x0A) => q(\n), # LINE FEED 207 qq(\x0B) => q(\v), # VERTICAL TABULATION 208 qq(\x0C) => q(^L), # FORM FEED 209 qq(\x0D) => q(\r), # CARRIAGE RETURN 210 qq(\x0E) => q(^N), # SHIFT OUT 211 qq(\x0F) => q(^O), # SHIFT IN 212 qq(\x10) => q(^P), # DATA LINK ESCAPE 213 qq(\x11) => q(^Q), # DEVICE CONTROL ONE 214 qq(\x12) => q(^R), # DEVICE CONTROL TWO 215 qq(\x13) => q(^S), # DEVICE CONTROL THREE 216 qq(\x14) => q(^T), # DEVICE CONTROL FOUR 217 qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE 218 qq(\x16) => q(^V), # SYNCHRONOUS IDLE 219 qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK 220 qq(\x18) => q(^X), # CANCEL 221 qq(\x19) => q(^Y), # END OF MEDIUM 222 qq(\x1A) => q(^Z), # SUBSTITUTE 223 qq(\x1B) => q(\e), # ESCAPE (\c[) 224 qq(\x1C) => "^\\", # FILE SEPARATOR 225 qq(\x1D) => "^\]", # GROUP SEPARATOR 226 qq(\x1E) => q(^^), # RECORD SEPARATOR 227 qq(\x1F) => q(^_), # UNIT SEPARATOR 228 ); 229 # 230 # Generated out of lib/unicore/EastAsianWidth.txt 231 # will it work ? 232 # 233 our $IsFullWidth = 234 qr/^[ 235 \x{1100}-\x{1159} 236 \x{115F}-\x{115F} 237 \x{2329}-\x{232A} 238 \x{2E80}-\x{2E99} 239 \x{2E9B}-\x{2EF3} 240 \x{2F00}-\x{2FD5} 241 \x{2FF0}-\x{2FFB} 242 \x{3000}-\x{303E} 243 \x{3041}-\x{3096} 244 \x{3099}-\x{30FF} 245 \x{3105}-\x{312C} 246 \x{3131}-\x{318E} 247 \x{3190}-\x{31B7} 248 \x{31F0}-\x{321C} 249 \x{3220}-\x{3243} 250 \x{3251}-\x{327B} 251 \x{327F}-\x{32CB} 252 \x{32D0}-\x{32FE} 253 \x{3300}-\x{3376} 254 \x{337B}-\x{33DD} 255 \x{3400}-\x{4DB5} 256 \x{4E00}-\x{9FA5} 257 \x{33E0}-\x{33FE} 258 \x{A000}-\x{A48C} 259 \x{AC00}-\x{D7A3} 260 \x{A490}-\x{A4C6} 261 \x{F900}-\x{FA2D} 262 \x{FA30}-\x{FA6A} 263 \x{FE30}-\x{FE46} 264 \x{FE49}-\x{FE52} 265 \x{FE54}-\x{FE66} 266 \x{FE68}-\x{FE6B} 267 \x{FF01}-\x{FF60} 268 \x{FFE0}-\x{FFE6} 269 \x{20000}-\x{2A6D6} 270 ]$/xo; 271} 272 273__END__ 274