1#
2# -*- Perl -*-
3# $Id: xps.pl,v 1.1.2.1 2007-02-23 05:02:43 opengl2772 Exp $
4# Copyright (C) 2007 Yukio USUDA,
5#               2007 Namazu Project All rights reserved.
6#     This is free software with ABSOLUTELY NO WARRANTY.
7#
8#  This program is free software; you can redistribute it and/or modify
9#  it under the terms of the GNU General Public License as published by
10#  the Free Software Foundation; either versions 2, or (at your option)
11#  any later version.
12#
13#  This program is distributed in the hope that it will be useful
14#  but WITHOUT ANY WARRANTY; without even the implied warranty of
15#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16#  GNU General Public License for more details.
17#
18#  You should have received a copy of the GNU General Public License
19#  along with this program; if not, write to the Free Software
20#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21#  02111-1307, USA
22#
23#  This file must be encoded in EUC-JP encoding
24
25package xps;
26use strict;
27use English;
28require 'util.pl';
29require 'gfilter.pl';
30require 'ooo.pl';
31require 'msofficexml.pl';
32
33my $utfconvpath = undef;
34my $unzippath = undef;
35my @unzipopts;
36
37sub mediatype() {
38    return (
39        'application/vnd.ms-xpsdocument',
40    );
41}
42
43sub status() {
44    $unzippath = util::checkcmd('unzip');
45    if (defined $unzippath){
46        @unzipopts = ("-p");
47        if (util::islang("ja")) {
48           if (($conf::NKF eq 'module_nkf') && ($NKF::VERSION >= 2.00)) {
49               return 'yes';
50           }
51           return 'no';
52        } else {
53           return 'yes';
54        }
55    }
56    return 'no';
57}
58
59sub recursive() {
60    return 0;
61}
62
63sub pre_codeconv() {
64    return 0;
65}
66
67sub post_codeconv () {
68    return 0;
69}
70
71sub add_magic ($) {
72    my ($magic) = @_;
73
74    # FIXME: very ad hoc.
75    $magic->addFileExts('\\.xps$', 'application/vnd.ms-xpsdocument');
76    return;
77}
78
79sub filter ($$$$$) {
80    my ($orig_cfile, $contref, $weighted_str, $headings, $fields)
81        = @_;
82    my $cfile = defined $orig_cfile ? $$orig_cfile : '';
83    msofficexml::filter_metafile($contref, $weighted_str, $fields);
84    filter_contentfile($contref, $weighted_str, $headings, $fields);
85    return undef;
86}
87
88sub zip_read ($$$) {
89    my ($zipref, $fname, $unzipcontref) = @_;
90    my $tmpfile;
91    my $uniqnumber = int(rand(10000));
92    do {
93        $tmpfile = util::tmpnam('NMZ.zip' . substr("000$uniqnumber", -4));
94        $uniqnumber++;
95    } while (-f $tmpfile);
96    {
97        my $fh = util::efopen("> $tmpfile");
98        print $fh $$zipref;
99        util::fclose($fh);
100    }
101    my @cmd = ($unzippath, @unzipopts, $tmpfile, $fname);
102    my $status = util::syscmd(
103        command => \@cmd,
104        option => {
105            "stdout" => $unzipcontref,
106            "stderr" => "/dev/null",
107            "mode_stdout" => "wb",
108            "mode_stderr" => "wt",
109        },
110    );
111    unlink $tmpfile;
112}
113
114sub get_pages_list ($$) {
115    my ($zipref, $pagesref) = @_;
116    my $tmpfile  = util::tmpnam('NMZ.zip');
117    {
118        my $fh = util::efopen("> $tmpfile");
119        print $fh $$zipref;
120        util::fclose($fh);
121    }
122    my @unzipopts_getlist = ("-Z", "-1");
123    my @cmd = ($unzippath, @unzipopts_getlist, $tmpfile);
124    my $file_list;
125    my $status = util::syscmd(
126        command => \@cmd,
127        option => {
128            "stdout" => \$file_list,
129            "stderr" => "/dev/null",
130            "mode_stdout" => "wt",
131            "mode_stderr" => "wt",
132        },
133    );
134    if ($status == 0) {
135        while ($file_list =~ m!\n
136            (Documents/1/Pages/\d+\.fpage)!gx){
137            my $filename = $1;
138            push(@$pagesref, $filename);
139        }
140    }
141    unlink $tmpfile;
142}
143
144sub filter_contentfile ($$$$$) {
145    my ($contref, $weighted_str, $headings, $fields) = @_;
146    my @pagefiles;
147    my $xml = "";
148
149    get_pages_list($contref, \@pagefiles);
150    foreach my $filename (@pagefiles){
151        my $xmlcont = '';
152        xps::zip_read($contref, $filename, \$xmlcont);
153        codeconv::toeuc(\$xmlcont);
154        xps::get_document(\$xmlcont);
155        $xml .= ' ' . $xmlcont
156    }
157
158    ooo::remove_all_tag(\$xml);
159    ooo::decode_entity(\$xml);
160
161    # Code conversion for Japanese document.
162    if (util::islang("ja")) {
163        codeconv::normalize_eucjp(\$xml);
164    }
165
166    $$contref = $xml;
167
168    gfilter::line_adjust_filter($contref);
169    gfilter::line_adjust_filter($weighted_str);
170    gfilter::white_space_adjust_filter($contref);
171    gfilter::show_filter_debug_info($contref, $weighted_str,
172                                    $fields, $headings);
173}
174
175sub get_keywords ($) {
176    my ($contref) = @_;
177    my @keywordstmp;
178    push(@keywordstmp ,$$contref =~ m!<cp:keywords>(.*)</cp:keywords>!g);
179    return  join(" ",@keywordstmp);
180}
181
182sub get_document ($) {
183    my ($contref) = @_;
184    my @documents;
185    push(@documents ,$$contref =~ m!UnicodeString="([^"]*)"!g);
186    $$contref = join(" ",@documents);
187}
188
1891;
190