1#!./perl 2 3use strict; 4use Encode; 5use Getopt::Std; 6my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt); 7$Opt{p} ||= $Opt{P}; 8$Opt{e} ||= 'utf8'; 9$Opt{f} ||= $Opt{e}; 10$Opt{t} ||= $Opt{e}; 11$Opt{h} and help(); 12 13my ($linebuf, $outbuf); 14my $CPL = $Opt{p} ? 64 : 8; 15my $linenum; 16my $linesperheading = $Opt{H}; 17my $nchars; 18our $PrevChunk; 19 20$Opt{h} and help(); 21$Opt{p} and do_perl($Opt{s}); 22do_dump($Opt{s}); 23exit; 24 25# 26 27sub do_perl{ 28 my $string = shift; 29 $Opt{P} and print "#!$^X -w\nprint\n"; 30 unless ($string){ 31 while(<>){ 32 use utf8; 33 $linebuf .= Encode::decode($Opt{f}, $_); 34 while($linebuf){ 35 my $chr = render_p(substr($linebuf, 0, 1, '')); 36 length($outbuf) + length($chr) > $CPL and print_P(); 37 $outbuf .= $chr; 38 } 39 } 40 $outbuf and print print_P(";"); 41 }else{ 42 while($string){ 43 my $chr = render_p(substr($string, 0, 1, '')); 44 length($outbuf) + length($chr) > $CPL and print_P(); 45 $outbuf .= $chr; 46 } 47 } 48 $outbuf and print print_P(";"); 49 exit; 50} 51 52sub render_p{ 53 my ($chr, $format) = @_; 54 our %S2pstr; 55 $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n... 56 $chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable; 57 my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ? 58 q(\x%x) : q(\x{%x}); 59 return sprintf $fmt, ord($chr); 60} 61 62sub print_P{ 63 my $end = shift; 64 $outbuf or return; 65 print '"', encode($Opt{t}, $outbuf), '"'; 66 my $tail = $Opt{P} ? $end ? "$end" : "," : ''; 67 print $tail, "\n"; 68 $outbuf = ''; 69} 70 71sub do_dump{ 72 my $string = shift; 73 !$Opt{p} and exists $Opt{H} and print_H(); 74 unless ($string){ 75 while(<>){ 76 use utf8; 77 $linebuf .= Encode::decode($Opt{f}, $_); 78 while (length($linebuf) > $CPL){ 79 my $chunk = substr($linebuf, 0, $CPL, ''); 80 print_C($chunk, $linenum++); 81 $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); 82 } 83 } 84 $linebuf and print_C($linebuf); 85 }else{ 86 while ($string){ 87 my $chunk = substr($string, 0, $CPL, ''); 88 print_C($chunk, $linenum++); 89 $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); 90 } 91 } 92 exit; 93} 94 95sub print_S{ 96 print "--------+------------------------------------------------"; 97 if ($Opt{C}){ 98 print "-+-----------------"; 99 } 100 print "\n"; 101} 102sub print_H{ 103 print " Offset 0 1 2 3 4 5 6 7"; 104 if ($Opt{C}){ 105 print " | 0 1 2 3 4 5 6 7"; 106 } 107 print "\n"; 108 print_S; 109} 110 111sub print_C{ 112 my ($chunk, $linenum) = @_; 113 if (!$Opt{v} and $chunk eq $PrevChunk){ 114 printf "%08x *\n", $linenum*8; return; 115 } 116 $PrevChunk = $chunk; 117 my $end = length($chunk) - 1; 118 my (@ord, @chr); 119 for my $i (0..$end){ 120 use utf8; 121 my $chr = substr($chunk,$i,1); 122 my $ord = ord($chr); 123 my $fmt = $ord <= 0xffff ? " %04x" : " %05x"; 124 push @ord, (sprintf $fmt, $ord); 125 $Opt{C} and push @chr, render_c($chr); 126 } 127 if (++$end < 7){ 128 for my $i ($end..7){ 129 push @ord, (" " x 6); 130 } 131 } 132 my $line = sprintf "%08x %s", $linenum*8, join('', @ord); 133 $Opt{C} and $line .= sprintf " | %s", join('', @chr); 134 print encode($Opt{t}, $line), "\n"; 135} 136 137sub render_c{ 138 my ($chr, $format) = @_; 139 our (%S2str, $IsFullWidth); 140 $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || " "; 141 $chr =~ $IsFullWidth and return $chr; # as is 142 return " " . $chr; 143} 144 145sub help{ 146 my $message = shift; 147 use File::Basename; 148 my $name = basename($0); 149 $message and print STDERR "$name error: $message\n"; 150 print STDERR <<"EOT"; 151Usage: 152 $name -[options...] [files...] 153 $name -[options...] -s "string" 154 $name -h 155 -h prints this message. 156Inherited from hexdump; 157 -C Canonical unidump mode 158 -v prints the duplicate line as is. Without this option, 159 single "*" will be printed instead. 160For unidump only 161 -p prints in perl literals that you can copy and paste directly 162 to your perl script. 163 -P prints in perl executable format! 164 -u prints a bunch of "Uxxxx,". Handy when you want to pass your 165 characters in mailing lists. 166IO Options: 167 -e io_encoding same as "-f io_encoding -t io_encoding" 168 -f from_encoding convert the source stream from this encoding 169 -t to_encoding print to STDOUT in this encoding 170 -s string "string" will be converted instead of STDIN. 171 -H nline prints separater for each nlines of output. 172 0 means only the table headding be printed. 173EOT 174 exit; 175} 176 177BEGIN{ 178 our %S2pstr= ( 179 "\\" => '\\\\', 180 "\0" => '\0', 181 "\t" => '\t', 182 "\n" => '\n', 183 "\r" => '\r', 184 "\v" => '\v', 185 "\a" => '\a', 186 "\e" => '\e', 187 "\"" => qq(\\\"), 188 "\'" => qq(\\\'), 189 '$' => '\$', 190 "@" => '\@', 191 "%" => '\%', 192 ); 193 194 our %S2str = ( 195 qq(\x00) => q(\0), # NULL 196 qq(\x01) => q(^A), # START OF HEADING 197 qq(\x02) => q(^B), # START OF TEXT 198 qq(\x03) => q(^C), # END OF TEXT 199 qq(\x04) => q(^D), # END OF TRANSMISSION 200 qq(\x05) => q(^E), # ENQUIRY 201 qq(\x06) => q(^F), # ACKNOWLEDGE 202 qq(\x07) => q(\a), # BELL 203 qq(\x08) => q(^H), # BACKSPACE 204 qq(\x09) => q(\t), # HORIZONTAL TABULATION 205 qq(\x0A) => q(\n), # LINE FEED 206 qq(\x0B) => q(\v), # VERTICAL TABULATION 207 qq(\x0C) => q(^L), # FORM FEED 208 qq(\x0D) => q(\r), # CARRIAGE RETURN 209 qq(\x0E) => q(^N), # SHIFT OUT 210 qq(\x0F) => q(^O), # SHIFT IN 211 qq(\x10) => q(^P), # DATA LINK ESCAPE 212 qq(\x11) => q(^Q), # DEVICE CONTROL ONE 213 qq(\x12) => q(^R), # DEVICE CONTROL TWO 214 qq(\x13) => q(^S), # DEVICE CONTROL THREE 215 qq(\x14) => q(^T), # DEVICE CONTROL FOUR 216 qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE 217 qq(\x16) => q(^V), # SYNCHRONOUS IDLE 218 qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK 219 qq(\x18) => q(^X), # CANCEL 220 qq(\x19) => q(^Y), # END OF MEDIUM 221 qq(\x1A) => q(^Z), # SUBSTITUTE 222 qq(\x1B) => q(\e), # ESCAPE (\c[) 223 qq(\x1C) => "^\\", # FILE SEPARATOR 224 qq(\x1D) => "^\]", # GROUP SEPARATOR 225 qq(\x1E) => q(^^), # RECORD SEPARATOR 226 qq(\x1F) => q(^_), # UNIT SEPARATOR 227 ); 228 # 229 # Generated out of lib/unicore/EastAsianWidth.txt 230 # will it work ? 231 # 232 our $IsFullWidth = 233 qr/^[ 234 \x{1100}-\x{1159} 235 \x{115F}-\x{115F} 236 \x{2329}-\x{232A} 237 \x{2E80}-\x{2E99} 238 \x{2E9B}-\x{2EF3} 239 \x{2F00}-\x{2FD5} 240 \x{2FF0}-\x{2FFB} 241 \x{3000}-\x{303E} 242 \x{3041}-\x{3096} 243 \x{3099}-\x{30FF} 244 \x{3105}-\x{312C} 245 \x{3131}-\x{318E} 246 \x{3190}-\x{31B7} 247 \x{31F0}-\x{321C} 248 \x{3220}-\x{3243} 249 \x{3251}-\x{327B} 250 \x{327F}-\x{32CB} 251 \x{32D0}-\x{32FE} 252 \x{3300}-\x{3376} 253 \x{337B}-\x{33DD} 254 \x{3400}-\x{4DB5} 255 \x{4E00}-\x{9FA5} 256 \x{33E0}-\x{33FE} 257 \x{A000}-\x{A48C} 258 \x{AC00}-\x{D7A3} 259 \x{A490}-\x{A4C6} 260 \x{F900}-\x{FA2D} 261 \x{FA30}-\x{FA6A} 262 \x{FE30}-\x{FE46} 263 \x{FE49}-\x{FE52} 264 \x{FE54}-\x{FE66} 265 \x{FE68}-\x{FE6B} 266 \x{FF01}-\x{FF60} 267 \x{FFE0}-\x{FFE6} 268 \x{20000}-\x{2A6D6} 269 ]$/xo; 270} 271 272__END__ 273