1package Convert::Moji;
2
3use warnings;
4use strict;
5
6require Exporter;
7our @ISA = qw(Exporter);
8our @EXPORT_OK = qw/make_regex length_one unambiguous/;
9
10use Carp;
11
12our $VERSION = '0.10';
13
14# Load a converter from a file and return a hash reference containing
15# the left/right pairs.
16
17sub load_convertor
18{
19    my ($file) = @_;
20    my $file_in;
21    if (! open $file_in, "<:encoding(utf8)", $file) {
22	carp "Could not open '$file' for reading: $!";
23	return;
24    }
25    my %converter;
26    while (my $line = <$file_in>) {
27	chomp $line;
28	my ($left, $right) = split /\s+/, $line;
29	$converter{$left} = $right;
30    }
31    close $file_in or croak "Could not close '$file': $!";
32    return \%converter;
33}
34
35sub length_one
36{
37    for (@_) {
38	return if !/^.$/;
39    }
40    return 1;
41}
42
43sub make_regex
44{
45    my @inputs = @_;
46    # Quote any special characters. We could also do this with join
47    # '\E|\Q', but the regexes then become even longer.
48    @inputs = map {quotemeta} @inputs;
49    if (length_one (@inputs)) {
50	return '(['.(join '', @inputs).'])';
51    }
52    else {
53	# Sorting is essential, otherwise shorter characters match before
54	# longer ones, causing errors if the shorter character is part of
55	# a longer one.
56	return '('.join ('|',sort { length($b) <=> length($a) } @inputs).')';
57    }
58}
59
60sub unambiguous
61{
62    my ($table) = @_;
63    my %inverted;
64    for (keys %$table) {
65	my $v = $$table{$_};
66	return if $inverted{$v};
67	$inverted{$v} = $_;
68    }
69    # Is not ambiguous
70    return 1;
71}
72
73# If the table is unambiguous, we can use Perl's built-in "reverse"
74# function. However, if the table is ambiguous, "reverse" will lose
75# information. The method applied here is to make a hash with the
76# values of $table as keys and the values are array references.
77
78sub ambiguous_reverse
79{
80    my ($table) = @_;
81    my %inverted;
82    for (keys %$table) {
83	my $val = $table->{$_};
84	push @{$inverted{$val}}, $_;
85    }
86    for (keys %inverted) {
87	@{$inverted{$_}} = sort @{$inverted{$_}};
88    }
89    return \%inverted;
90}
91
92# Callback
93
94sub split_match
95{
96    my ($erter, $input, $convert_type) = @_;
97    if (! $convert_type) {
98	$convert_type = 'first';
99    }
100    my $lhs = $erter->{rhs};
101    my $rhs = $erter->{out2in};
102    if (!$convert_type || $convert_type eq 'first') {
103	$input =~ s/$lhs/$$rhs{$1}->[0]/eg;
104	return $input;
105    }
106    elsif ($convert_type eq 'random') {
107	my $size = @$rhs;
108	$input =~ s/$lhs/$$rhs{$1}->[int rand $size]/eg;
109	return $input;
110    }
111    elsif ($convert_type eq 'all' || $convert_type eq 'all_joined') {
112	my @output = grep {length($_) > 0} (split /$lhs/, $input);
113	for my $o (@output) {
114	    if ($o =~ /$lhs/) {
115		$o = $$rhs{$1};
116	    }
117	}
118	if ($convert_type eq 'all') {
119	    return \@output;
120	}
121        else {
122	    return join ('',map {ref($_) eq 'ARRAY' ? "[@$_]" : $_} @output);
123	}
124    }
125    else {
126	carp "Unknown convert_type $convert_type";
127    }
128}
129
130# Attach a table to a Convert::Moji object.
131
132sub table
133{
134    my ($table, $noinvert) = @_;
135    my $erter = {};
136    $erter->{type} = "table";
137    $erter->{in2out} = $table;
138    my @keys = keys %$table;
139    my @values = values %$table;
140    $erter->{lhs} = make_regex @keys;
141    if (!$noinvert) {
142	$erter->{unambiguous} = unambiguous($table);
143	if ($erter->{unambiguous}) {
144	    my %out2in_table = reverse %{$table};
145	    $erter->{out2in} = \%out2in_table;
146	}
147	else {
148	    $erter->{out2in} = ambiguous_reverse ($table);
149	    @values = keys %{$erter->{out2in}};
150	}
151	$erter->{rhs} = make_regex @values;
152    }
153    return $erter;
154}
155
156# Make a converter from a tr instruction.
157
158sub tr_erter
159{
160    my ($lhs, $rhs) = @_;
161    my $erter = {};
162    $erter->{type} = "tr";
163    $erter->{lhs} = $lhs;
164    $erter->{rhs} = $rhs;
165    return $erter;
166}
167
168# Add a code-based converter
169
170sub code
171{
172    my ($convert, $invert) = @_;
173    my $erter = {};
174    $erter->{type} = "code";
175    $erter->{convert} = $convert;
176    $erter->{invert} = $invert;
177    return $erter;
178}
179
180sub new
181{
182    my ($package, @conversions) = @_;
183    my $conv = {};
184    bless $conv;
185    $conv->{erter} = [];
186    $conv->{erters} = 0;
187    for my $c (@conversions) {
188	my $noinvert;
189	my $erter;
190	if ($c->[0] eq "oneway") {
191	    shift @$c;
192	    $noinvert = 1;
193	}
194	if ($c->[0] eq "table") {
195	    $erter = table ($c->[1], $noinvert);
196	}
197	elsif ($c->[0] eq "file") {
198	    my $file = $c->[1];
199	    my $table = Convert::Moji::load_convertor ($file);
200	    return if !$table;
201	    $erter = table ($table, $noinvert);
202	}
203	elsif ($c->[0] eq 'tr') {
204	    $erter = tr_erter ($c->[1], $c->[2]);
205	}
206	elsif ($c->[0] eq 'code') {
207	    $erter = code ($c->[1], $c->[2]);
208	    if (!$c->[2]) {
209		$noinvert = 1;
210	    }
211	}
212	my $o = $conv->{erters};
213	$conv->{erter}->[$o] = $erter;
214	$conv->{noinvert}->[$o] = $noinvert;
215	$conv->{erters}++;
216    }
217    return $conv;
218}
219
220sub convert
221{
222    my ($conv, $input) = @_;
223    for (my $i = 0; $i < $conv->{erters}; $i++) {
224	my $erter = $conv->{erter}->[$i];
225	if ($erter->{type} eq "table") {
226	    my $lhs = $erter->{lhs};
227	    my $rhs = $erter->{in2out};
228	    $input =~ s/$lhs/$$rhs{$1}/g;
229	}
230        elsif ($erter->{type} eq 'tr') {
231	    my $lhs = $erter->{lhs};
232	    my $rhs = $erter->{rhs};
233	    eval ("\$input =~ tr/$lhs/$rhs/");
234	}
235        elsif ($erter->{type} eq 'code') {
236	    $_ = $input;
237	    $input = &{$erter->{convert}};
238	}
239    }
240    return $input;
241}
242
243sub invert
244{
245    my ($conv, $input, $convert_type) = @_;
246    for (my $i = $conv->{erters} - 1; $i >= 0; $i--) {
247	next if $conv->{noinvert}->[$i];
248	my $erter = $conv->{erter}->[$i];
249	if ($erter->{type} eq "table") {
250	    if ($erter->{unambiguous}) {
251		my $lhs = $erter->{rhs};
252		my $rhs = $erter->{out2in};
253		$input =~ s/$lhs/$$rhs{$1}/g;
254	    }
255            else {
256		$input = split_match ($erter, $input, $convert_type);
257	    }
258	}
259        elsif ($erter->{type} eq 'tr') {
260	    my $lhs = $erter->{rhs};
261	    my $rhs = $erter->{lhs};
262	    eval ("\$input =~ tr/$lhs/$rhs/");
263	}
264        elsif ($erter->{type} eq 'code') {
265	    $_ = $input;
266	    $input = &{$erter->{invert}};
267	}
268    }
269    return $input;
270}
271
2721;
273
274
275
276