1#!/usr/local/bin/perl
2
3# mismaps -- find 8-bit codepoints w/o Unicode mapping
4
5# Tom Christiansen <tchrist@perl.com>
6
7use v5.14;
8use utf8;
9use strict;
10use autodie;
11use warnings;
12use warnings "FATAL" => "utf8";
13use open qw< :utf8 :std >;
14
15use charnames qw<:full :alias> => {
16    Apple_Mac   => 0xF8FF,
17    unchanged   => "LEFT RIGHT DOUBLE ARROW",
18};
19
20use Unicode::Normalize;
21
22#######################################################
23
24sub ratsort;
25
26#######################################################
27
28our $SHOW_BADMAPS_ONLY  = 0;
29our $SHOW_CHANGED_ONLY  = 1;   # if previous is 1, this is immaterial
30
31our $VERSION = v0.0.1311040647; # 19:57:27 MDT Mon Jul 18 2011
32
33$| = 1;
34
35mysᴏ = map { "iso-$_" } ratsort qw{
36    8859-1   8859-4   8859-7   8859-10  8859-14
37    8859-2   8859-5   8859-8   8859-11  8859-15
38    8859-3   8859-6   8859-9   8859-13  8859-16
39};
40
41mysoft = map { "cp$_"} ratsort qw{
42     37   855   864    949  1253
43    424   856   865    950  1254
44    437   857   866   1006  1255
45    500   858   869   1026  1256
46    737   860   874   1047  1257
47    775   861   875   1250  1258
48    850   862   932   1251
49    852   863   936   1252
50};
51
52my @apple = map { "Mac$_" } ratsort qw{
53    Arabic             Thai
54    CentralEurRoman    Icelandic
55    Croatian           Roman
56    Cyrillic           Rumanian
57    Dingbats           Sami
58    Farsi              Symbol
59    Greek              Turkish
60    Hebrew             Ukrainian
61};
62
63# kanji for "koi", of course :)
64my @鯉 = ratsort <koi8-{f,u,r}>;
65
66my $cmd = "byte2uni";
67
68my @etc = ratsort qw( nextstep hp-roman8 dingbats viscii symbol posix-bc );
69
70
71my @all_tests =  (@μsoft, @ɪsᴏ, @apple, @鯉, @etc);
72
73my @tests = ();
74unless (@ARGV) {
75    @tests =  @all_tests;
76} else {
77    state $testmap = {
78
79        all         => \@all_tests,
80        everything  => \@all_tests,
81
82	dos         => \@μsoft,
83	microsoft   => \@μsoft,
84	ms          => \@μsoft,
85	windows     => \@μsoft,
86	win         => \@μsoft,
87
88        posix       => \@ɪsᴏ,
89        iso         => \@ɪsᴏ,
90        standard    => \@ɪsᴏ,
91        std         => \@ɪsᴏ,
92
93        apple       => \@apple,
94        mac         => \@apple,
95        macintosh   => \@apple,
96
97        koi         => \@鯉,
98
99        etc         => \@etc,
100        ali         => \@etc,
101        alia        => \@etc,
102        alios       => \@etc,
103        others      => \@etc,
104
105    };
106
107    my %seen;
108
109    for my $arg (map {lc} @ARGV) {
110        my $resolve = @{ $$testmap{$arg} || [lc $arg] };
111        next if $seen{$resolve}++;
112        push @tests, $resolve;
113    }
114
115}
116
117
118for my $enc (@tests) {
119    say "\n$0: testing $enc";
120    my @args =( $cmd, "--all", "--encoding=$enc" );
121    open(my $b2u, "-| :utf8", @args) || die "can't open pipe: $!";
122    local $_;
123    while (<$b2u>) {
124        next if $SHOW_CHANGED_ONLY &&  m< \N{unchanged} >x;
125        next if $SHOW_BADMAPS_ONLY &&! m<
126               Block=
127            |  REPLACEMENT
128            |  \Q \\N { U + \E
129        >x;
130
131        print;
132    }
133
134    eval { close($b2u) };
135    exit if $? & 255;
136}
137
138sub ratsort { return
139    map  { $_->[0]                                       }
140    sort { $a->[1] cmp $b->[1]                           }
141    map  { [ $_ => lc s/(\d+)/sprintf("%012s", $1)/reg ] }
142    @_
143    ;
144}
145
146