1package Convert::Moji; 2 3use warnings; 4use strict; 5 6require Exporter; 7our @ISA = qw(Exporter); 8our @EXPORT_OK = qw/make_regex length_one unambiguous/; 9 10use Carp; 11 12our $VERSION = '0.10'; 13 14# Load a converter from a file and return a hash reference containing 15# the left/right pairs. 16 17sub load_convertor 18{ 19 my ($file) = @_; 20 my $file_in; 21 if (! open $file_in, "<:encoding(utf8)", $file) { 22 carp "Could not open '$file' for reading: $!"; 23 return; 24 } 25 my %converter; 26 while (my $line = <$file_in>) { 27 chomp $line; 28 my ($left, $right) = split /\s+/, $line; 29 $converter{$left} = $right; 30 } 31 close $file_in or croak "Could not close '$file': $!"; 32 return \%converter; 33} 34 35sub length_one 36{ 37 for (@_) { 38 return if !/^.$/; 39 } 40 return 1; 41} 42 43sub make_regex 44{ 45 my @inputs = @_; 46 # Quote any special characters. We could also do this with join 47 # '\E|\Q', but the regexes then become even longer. 48 @inputs = map {quotemeta} @inputs; 49 if (length_one (@inputs)) { 50 return '(['.(join '', @inputs).'])'; 51 } 52 else { 53 # Sorting is essential, otherwise shorter characters match before 54 # longer ones, causing errors if the shorter character is part of 55 # a longer one. 56 return '('.join ('|',sort { length($b) <=> length($a) } @inputs).')'; 57 } 58} 59 60sub unambiguous 61{ 62 my ($table) = @_; 63 my %inverted; 64 for (keys %$table) { 65 my $v = $$table{$_}; 66 return if $inverted{$v}; 67 $inverted{$v} = $_; 68 } 69 # Is not ambiguous 70 return 1; 71} 72 73# If the table is unambiguous, we can use Perl's built-in "reverse" 74# function. However, if the table is ambiguous, "reverse" will lose 75# information. The method applied here is to make a hash with the 76# values of $table as keys and the values are array references. 77 78sub ambiguous_reverse 79{ 80 my ($table) = @_; 81 my %inverted; 82 for (keys %$table) { 83 my $val = $table->{$_}; 84 push @{$inverted{$val}}, $_; 85 } 86 for (keys %inverted) { 87 @{$inverted{$_}} = sort @{$inverted{$_}}; 88 } 89 return \%inverted; 90} 91 92# Callback 93 94sub split_match 95{ 96 my ($erter, $input, $convert_type) = @_; 97 if (! $convert_type) { 98 $convert_type = 'first'; 99 } 100 my $lhs = $erter->{rhs}; 101 my $rhs = $erter->{out2in}; 102 if (!$convert_type || $convert_type eq 'first') { 103 $input =~ s/$lhs/$$rhs{$1}->[0]/eg; 104 return $input; 105 } 106 elsif ($convert_type eq 'random') { 107 my $size = @$rhs; 108 $input =~ s/$lhs/$$rhs{$1}->[int rand $size]/eg; 109 return $input; 110 } 111 elsif ($convert_type eq 'all' || $convert_type eq 'all_joined') { 112 my @output = grep {length($_) > 0} (split /$lhs/, $input); 113 for my $o (@output) { 114 if ($o =~ /$lhs/) { 115 $o = $$rhs{$1}; 116 } 117 } 118 if ($convert_type eq 'all') { 119 return \@output; 120 } 121 else { 122 return join ('',map {ref($_) eq 'ARRAY' ? "[@$_]" : $_} @output); 123 } 124 } 125 else { 126 carp "Unknown convert_type $convert_type"; 127 } 128} 129 130# Attach a table to a Convert::Moji object. 131 132sub table 133{ 134 my ($table, $noinvert) = @_; 135 my $erter = {}; 136 $erter->{type} = "table"; 137 $erter->{in2out} = $table; 138 my @keys = keys %$table; 139 my @values = values %$table; 140 $erter->{lhs} = make_regex @keys; 141 if (!$noinvert) { 142 $erter->{unambiguous} = unambiguous($table); 143 if ($erter->{unambiguous}) { 144 my %out2in_table = reverse %{$table}; 145 $erter->{out2in} = \%out2in_table; 146 } 147 else { 148 $erter->{out2in} = ambiguous_reverse ($table); 149 @values = keys %{$erter->{out2in}}; 150 } 151 $erter->{rhs} = make_regex @values; 152 } 153 return $erter; 154} 155 156# Make a converter from a tr instruction. 157 158sub tr_erter 159{ 160 my ($lhs, $rhs) = @_; 161 my $erter = {}; 162 $erter->{type} = "tr"; 163 $erter->{lhs} = $lhs; 164 $erter->{rhs} = $rhs; 165 return $erter; 166} 167 168# Add a code-based converter 169 170sub code 171{ 172 my ($convert, $invert) = @_; 173 my $erter = {}; 174 $erter->{type} = "code"; 175 $erter->{convert} = $convert; 176 $erter->{invert} = $invert; 177 return $erter; 178} 179 180sub new 181{ 182 my ($package, @conversions) = @_; 183 my $conv = {}; 184 bless $conv; 185 $conv->{erter} = []; 186 $conv->{erters} = 0; 187 for my $c (@conversions) { 188 my $noinvert; 189 my $erter; 190 if ($c->[0] eq "oneway") { 191 shift @$c; 192 $noinvert = 1; 193 } 194 if ($c->[0] eq "table") { 195 $erter = table ($c->[1], $noinvert); 196 } 197 elsif ($c->[0] eq "file") { 198 my $file = $c->[1]; 199 my $table = Convert::Moji::load_convertor ($file); 200 return if !$table; 201 $erter = table ($table, $noinvert); 202 } 203 elsif ($c->[0] eq 'tr') { 204 $erter = tr_erter ($c->[1], $c->[2]); 205 } 206 elsif ($c->[0] eq 'code') { 207 $erter = code ($c->[1], $c->[2]); 208 if (!$c->[2]) { 209 $noinvert = 1; 210 } 211 } 212 my $o = $conv->{erters}; 213 $conv->{erter}->[$o] = $erter; 214 $conv->{noinvert}->[$o] = $noinvert; 215 $conv->{erters}++; 216 } 217 return $conv; 218} 219 220sub convert 221{ 222 my ($conv, $input) = @_; 223 for (my $i = 0; $i < $conv->{erters}; $i++) { 224 my $erter = $conv->{erter}->[$i]; 225 if ($erter->{type} eq "table") { 226 my $lhs = $erter->{lhs}; 227 my $rhs = $erter->{in2out}; 228 $input =~ s/$lhs/$$rhs{$1}/g; 229 } 230 elsif ($erter->{type} eq 'tr') { 231 my $lhs = $erter->{lhs}; 232 my $rhs = $erter->{rhs}; 233 eval ("\$input =~ tr/$lhs/$rhs/"); 234 } 235 elsif ($erter->{type} eq 'code') { 236 $_ = $input; 237 $input = &{$erter->{convert}}; 238 } 239 } 240 return $input; 241} 242 243sub invert 244{ 245 my ($conv, $input, $convert_type) = @_; 246 for (my $i = $conv->{erters} - 1; $i >= 0; $i--) { 247 next if $conv->{noinvert}->[$i]; 248 my $erter = $conv->{erter}->[$i]; 249 if ($erter->{type} eq "table") { 250 if ($erter->{unambiguous}) { 251 my $lhs = $erter->{rhs}; 252 my $rhs = $erter->{out2in}; 253 $input =~ s/$lhs/$$rhs{$1}/g; 254 } 255 else { 256 $input = split_match ($erter, $input, $convert_type); 257 } 258 } 259 elsif ($erter->{type} eq 'tr') { 260 my $lhs = $erter->{rhs}; 261 my $rhs = $erter->{lhs}; 262 eval ("\$input =~ tr/$lhs/$rhs/"); 263 } 264 elsif ($erter->{type} eq 'code') { 265 $_ = $input; 266 $input = &{$erter->{invert}}; 267 } 268 } 269 return $input; 270} 271 2721; 273 274 275 276