1#!/usr/local/bin/perl 2 3# mismaps -- find 8-bit codepoints w/o Unicode mapping 4 5# Tom Christiansen <tchrist@perl.com> 6 7use v5.14; 8use utf8; 9use strict; 10use autodie; 11use warnings; 12use warnings "FATAL" => "utf8"; 13use open qw< :utf8 :std >; 14 15use charnames qw<:full :alias> => { 16 Apple_Mac => 0xF8FF, 17 unchanged => "LEFT RIGHT DOUBLE ARROW", 18}; 19 20use Unicode::Normalize; 21 22####################################################### 23 24sub ratsort; 25 26####################################################### 27 28our $SHOW_BADMAPS_ONLY = 0; 29our $SHOW_CHANGED_ONLY = 1; # if previous is 1, this is immaterial 30 31our $VERSION = v0.0.1311040647; # 19:57:27 MDT Mon Jul 18 2011 32 33$| = 1; 34 35my @ɪsᴏ = map { "iso-$_" } ratsort qw{ 36 8859-1 8859-4 8859-7 8859-10 8859-14 37 8859-2 8859-5 8859-8 8859-11 8859-15 38 8859-3 8859-6 8859-9 8859-13 8859-16 39}; 40 41my @μsoft = map { "cp$_"} ratsort qw{ 42 37 855 864 949 1253 43 424 856 865 950 1254 44 437 857 866 1006 1255 45 500 858 869 1026 1256 46 737 860 874 1047 1257 47 775 861 875 1250 1258 48 850 862 932 1251 49 852 863 936 1252 50}; 51 52my @apple = map { "Mac$_" } ratsort qw{ 53 Arabic Thai 54 CentralEurRoman Icelandic 55 Croatian Roman 56 Cyrillic Rumanian 57 Dingbats Sami 58 Farsi Symbol 59 Greek Turkish 60 Hebrew Ukrainian 61}; 62 63# kanji for "koi", of course :) 64my @鯉 = ratsort <koi8-{f,u,r}>; 65 66my $cmd = "byte2uni"; 67 68my @etc = ratsort qw( nextstep hp-roman8 dingbats viscii symbol posix-bc ); 69 70 71my @all_tests = (@μsoft, @ɪsᴏ, @apple, @鯉, @etc); 72 73my @tests = (); 74unless (@ARGV) { 75 @tests = @all_tests; 76} else { 77 state $testmap = { 78 79 all => \@all_tests, 80 everything => \@all_tests, 81 82 dos => \@μsoft, 83 microsoft => \@μsoft, 84 ms => \@μsoft, 85 windows => \@μsoft, 86 win => \@μsoft, 87 88 posix => \@ɪsᴏ, 89 iso => \@ɪsᴏ, 90 standard => \@ɪsᴏ, 91 std => \@ɪsᴏ, 92 93 apple => \@apple, 94 mac => \@apple, 95 macintosh => \@apple, 96 97 koi => \@鯉, 98 99 etc => \@etc, 100 ali => \@etc, 101 alia => \@etc, 102 alios => \@etc, 103 others => \@etc, 104 105 }; 106 107 my %seen; 108 109 for my $arg (map {lc} @ARGV) { 110 my $resolve = @{ $$testmap{$arg} || [lc $arg] }; 111 next if $seen{$resolve}++; 112 push @tests, $resolve; 113 } 114 115} 116 117 118for my $enc (@tests) { 119 say "\n$0: testing $enc"; 120 my @args =( $cmd, "--all", "--encoding=$enc" ); 121 open(my $b2u, "-| :utf8", @args) || die "can't open pipe: $!"; 122 local $_; 123 while (<$b2u>) { 124 next if $SHOW_CHANGED_ONLY && m< \N{unchanged} >x; 125 next if $SHOW_BADMAPS_ONLY &&! m< 126 Block= 127 | REPLACEMENT 128 | \Q \\N { U + \E 129 >x; 130 131 print; 132 } 133 134 eval { close($b2u) }; 135 exit if $? & 255; 136} 137 138sub ratsort { return 139 map { $_->[0] } 140 sort { $a->[1] cmp $b->[1] } 141 map { [ $_ => lc s/(\d+)/sprintf("%012s", $1)/reg ] } 142 @_ 143 ; 144} 145 146