1#
2# -*- Perl -*-
3# filter.pl - class for filter
4#
5# $Id: filter.pl.in,v 1.5.2.3 2009-02-17 08:53:35 opengl2772 Exp $
6#
7# Copyright (C) 2004 Yukio USUDA All rights reserved.
8# Copyright (C) 2000-2004 Namazu Project All rights reversed.
9#     This is free software with ABSOLUTELY NO WARRANTY.
10#
11#  This program is free software; you can redistribute it and/or modify
12#  it under the terms of the GNU General Public License as published by
13#  the Free Software Foundation; either versions 2, or (at your option)
14#  any later version.
15#
16#  This program is distributed in the hope that it will be useful
17#  but WITHOUT ANY WARRANTY; without even the implied warranty of
18#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19#  GNU General Public License for more details.
20#
21#  You should have received a copy of the GNU General Public License
22#  along with this program; if not, write to the Free Software
23#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24#  02111-1307, USA
25
26package mknmz::filter;
27
28use strict;
29use English;
30require 'util.pl';
31require 'codeconv.pl';
32require 'var.pl';
33
34my $MMagic = undef;
35
36my %REQUIRE_ACTIONS = ();
37my %RECURSIVE_ACTIONS = ();
38my %REQUIRE_PRE_CODECONV = ('text/plain' => 1,);
39my %REQUIRE_POST_CODECONV = ('text/plain' => 0,);
40my %Supported = ('text/plain' => "yes",);
41
42my $PKGDATADIR = $ENV{'pkgdatadir'} || "@pkgdatadir@";
43my $filterdir  = $PKGDATADIR . "/filter"; # directory where filters are in.
44
45
46sub new {
47    my $self = {};
48    my $proto = shift @_;
49    my $class = ref($proto) || $proto;
50    bless($self, $class);
51
52    return $self;
53}
54
55sub init {
56    my $self = shift @_;
57    $MMagic = shift @_;
58
59    if (defined $conf::FILTERDIR && -d $conf::FILTERDIR) {
60	$filterdir = $conf::FILTERDIR
61    };
62
63    if (($English::OSNAME eq "MSWin32") || ($English::OSNAME eq "os2")) {
64	if ($filterdir !~ /^[A-Z]:|^\\\\/i && $0 =~ m#^([A-Z]:)(/|\\)#i) {
65	    $filterdir = $1 . $filterdir ;
66	}
67        util::win32_yen_to_slash(\$filterdir);
68    }
69
70    #
71    # Windows modules must be loaded first.
72    # Because OLE filters have low precedence over normal ones.
73    #
74    _load_filtermodules($filterdir);
75}
76
77sub apply_filter {
78    my $self = shift @_;
79    my $orig_cfile = shift @_;
80    my $contentref = shift @_;
81    my $mtype = shift @_;
82    $self->{'_lang'} = shift @_;
83    my $fields_ref = shift @_;
84
85    $self->{'weighted_str'} = "";
86    $self->{'headings'} = "";
87
88    # Pre code conversion.
89    if ($REQUIRE_PRE_CODECONV{$mtype}) {
90	util::dprint("pre_codeconv\n");
91	_codeconv_document($contentref);
92    }
93
94    util::vprint("mtype = $mtype\n");
95
96    if (! $Supported{$mtype} ||
97	$Supported{$mtype} ne 'yes') {
98	util::vprint(_("Unsupported media type ")."$mtype\n");
99	return (0, "$mtype; x-system=unsupported");
100    }
101
102    if ($REQUIRE_ACTIONS{$mtype}) {
103	util::vprint(_("Using ")."$REQUIRE_ACTIONS{$mtype}.pl\n");
104	require $REQUIRE_ACTIONS{$mtype}.'.pl'
105	    || die _("unable to require ") .
106	     "\"$REQUIRE_ACTIONS{$mtype}.pl\"\n";
107	my $err = undef;
108
109	{
110	    local $SIG{'PIPE'} = \&trapintr;
111	    my $eval_txt = '$err = ' . $REQUIRE_ACTIONS{$mtype} .
112	      '::filter($orig_cfile, $contentref,' .
113	      ' \$self->{\'weighted_str\'}, ' .
114	      ' \$self->{\'headings\'}, $$fields_ref);';
115	    eval $eval_txt;
116	}
117	if ($err) {
118	    if ($err =~ m/; x-system=unsupported$/) {
119		return (0, $err);
120	    }
121	    return (0, "$mtype; x-error=$err");
122	}
123	if ($@) {
124	    util::vprint(_("Failed to call ")."$REQUIRE_ACTIONS{$mtype}\n$@\n");
125	    return (0, "$mtype; x-error=$@");
126	}
127
128	# Post code conversion.
129	if ($REQUIRE_POST_CODECONV{$mtype}) {
130	    util::dprint("post_codeconv\n");
131	    _codeconv_document($contentref);
132	}
133
134	if ($RECURSIVE_ACTIONS{$mtype}) {
135	    my $uri;
136	    my $Document = undef;
137	    $Document = mknmz::document->new();
138	    $Document->init_doc($uri, $orig_cfile, $contentref, undef);
139	    $contentref= $Document->get_filtered_contentref();
140	    $self->{'weighted_str'} .= $Document->get_weighted_str();
141	    $self->{'headings'} .= $Document->get_headings();
142	}
143    }
144    return ($self->{'weighted_str'}, $self->{'headings'});
145}
146
147
148sub get_info {
149    my $self = shift @_;
150    my $name = shift @_;
151    if ($name eq 'filter_dir') {
152        return $filterdir;
153    } elsif ($name eq 'all_types') {
154        return $self->_get_all_types;
155    } elsif ($name eq 'supported_types') {
156        return $self->_get_supported_list;
157    } elsif ($name eq 'supported_list') {
158        return $self->_get_supported_list;
159    }
160}
161
162#######################################################
163sub _get_all_types {
164    my $self = shift @_;
165    my @all_types = keys %Supported;
166    return @all_types;
167}
168
169sub _get_supported_types {
170    my $self = shift @_;
171    my @supported = sort grep { $Supported{$_} eq "yes" } $self->_get_all_types;
172    return @supported;
173}
174
175sub _get_supported_list {
176    my $self = shift @_;
177    my $num_supported = $self->_get_supported_types();
178    my $num_unsupported = $self->_get_all_types() - $num_supported;
179    my $list = _("Supported media types:   ") . "($num_supported)\n";
180    $list .= _("Unsupported media types: ") . "($num_unsupported) " . _("marked with minus (-) probably missing application in your \$path.\n");
181    my $supported_list = "";
182    for my $mtype (sort keys %Supported) {
183	my $yn = $Supported{$mtype};
184	if ($yn eq 'yes') { $yn = ' ' } else {$yn = '-'};
185	$supported_list .= "$yn $mtype";
186	if ($REQUIRE_ACTIONS{$mtype}) {
187	    $supported_list .= ": $REQUIRE_ACTIONS{$mtype}.pl";
188	}
189	$supported_list .= "\n";
190    }
191    $list .= $supported_list;
192    return $list;
193}
194
195sub _codeconv_document ($) {
196    my ($textref) = @_;
197    #codeconv::to_inner_encoding($textref, 'unknown');
198    codeconv::toeuc($textref);
199    $$textref =~ s/\r\n/\n/g;
200    $$textref =~ s/\r/\n/g;
201    $$textref =~ tr/\x01-\x08\x0b-\x0c\x0e-\x1f\x7f/ /; # Remove control char.
202}
203
204sub _load_filtermodules ($) {
205    my $filterdir = shift @_;
206    unshift @INC, $filterdir;
207
208    # Check filter modules
209    my @filters = ();
210    @filters = glob "$filterdir/*.pl";
211
212    #
213    # Windows modules must be check only when Active Perl environment.
214    # Because OLE filters use Win32::OLE module.
215    #
216    if ($English::OSNAME eq "MSWin32") {
217	@filters = (@filters, glob "$filterdir/win32/*.pl");
218	unshift @INC, "$filterdir/win32";
219    }
220
221    _load_filters(@filters);
222}
223
224sub _load_filters ($) {
225    my @filters = @_;
226
227    for my $filter (@filters) {
228	$filter =~ m!([-\w]+)\.pl$!;
229	my $module = $1;
230	require "$module.pl" || die "unable to require \"$module.pl\"\n";;
231	my (@mtypes, $status, $recursive, $pre_codeconv, $post_codeconv);
232
233	eval "\@mtypes =    ${module}::mediatype();";
234	die $@ if $@;  # eval error
235	eval "\$status =    ${module}::status();";
236	die $@ if $@;
237	eval "\$recursive = ${module}::recursive();";
238	die $@ if $@;
239	eval "\$pre_codeconv  = ${module}::pre_codeconv();";
240	die $@ if $@;
241	eval "\$post_codeconv  = ${module}::post_codeconv();";
242	die $@ if $@;
243
244	my $tmp = ref \$MMagic; # I don't know why this line is needed.
245	eval "${module}::add_magic(\$MMagic);";
246	die $@ if $@;
247
248	for my $mt (@mtypes) {
249        next if (defined $Supported{$mt} &&
250                 $Supported{$mt} eq 'yes' && $status eq 'no');
251	    $Supported{$mt} = $status;
252	    $REQUIRE_ACTIONS{$mt} = $module;
253	    $RECURSIVE_ACTIONS{$mt} = $recursive;
254	    $REQUIRE_PRE_CODECONV{$mt} = $pre_codeconv;
255	    $REQUIRE_POST_CODECONV{$mt} = $post_codeconv;
256	}
257    }
258}
259
2601;
261