1# 2# -*- Perl -*- 3# $Id: xps.pl,v 1.1.2.1 2007-02-23 05:02:43 opengl2772 Exp $ 4# Copyright (C) 2007 Yukio USUDA, 5# 2007 Namazu Project All rights reserved. 6# This is free software with ABSOLUTELY NO WARRANTY. 7# 8# This program is free software; you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation; either versions 2, or (at your option) 11# any later version. 12# 13# This program is distributed in the hope that it will be useful 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with this program; if not, write to the Free Software 20# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 21# 02111-1307, USA 22# 23# This file must be encoded in EUC-JP encoding 24 25package xps; 26use strict; 27use English; 28require 'util.pl'; 29require 'gfilter.pl'; 30require 'ooo.pl'; 31require 'msofficexml.pl'; 32 33my $utfconvpath = undef; 34my $unzippath = undef; 35my @unzipopts; 36 37sub mediatype() { 38 return ( 39 'application/vnd.ms-xpsdocument', 40 ); 41} 42 43sub status() { 44 $unzippath = util::checkcmd('unzip'); 45 if (defined $unzippath){ 46 @unzipopts = ("-p"); 47 if (util::islang("ja")) { 48 if (($conf::NKF eq 'module_nkf') && ($NKF::VERSION >= 2.00)) { 49 return 'yes'; 50 } 51 return 'no'; 52 } else { 53 return 'yes'; 54 } 55 } 56 return 'no'; 57} 58 59sub recursive() { 60 return 0; 61} 62 63sub pre_codeconv() { 64 return 0; 65} 66 67sub post_codeconv () { 68 return 0; 69} 70 71sub add_magic ($) { 72 my ($magic) = @_; 73 74 # FIXME: very ad hoc. 75 $magic->addFileExts('\\.xps$', 'application/vnd.ms-xpsdocument'); 76 return; 77} 78 79sub filter ($$$$$) { 80 my ($orig_cfile, $contref, $weighted_str, $headings, $fields) 81 = @_; 82 my $cfile = defined $orig_cfile ? $$orig_cfile : ''; 83 msofficexml::filter_metafile($contref, $weighted_str, $fields); 84 filter_contentfile($contref, $weighted_str, $headings, $fields); 85 return undef; 86} 87 88sub zip_read ($$$) { 89 my ($zipref, $fname, $unzipcontref) = @_; 90 my $tmpfile; 91 my $uniqnumber = int(rand(10000)); 92 do { 93 $tmpfile = util::tmpnam('NMZ.zip' . substr("000$uniqnumber", -4)); 94 $uniqnumber++; 95 } while (-f $tmpfile); 96 { 97 my $fh = util::efopen("> $tmpfile"); 98 print $fh $$zipref; 99 util::fclose($fh); 100 } 101 my @cmd = ($unzippath, @unzipopts, $tmpfile, $fname); 102 my $status = util::syscmd( 103 command => \@cmd, 104 option => { 105 "stdout" => $unzipcontref, 106 "stderr" => "/dev/null", 107 "mode_stdout" => "wb", 108 "mode_stderr" => "wt", 109 }, 110 ); 111 unlink $tmpfile; 112} 113 114sub get_pages_list ($$) { 115 my ($zipref, $pagesref) = @_; 116 my $tmpfile = util::tmpnam('NMZ.zip'); 117 { 118 my $fh = util::efopen("> $tmpfile"); 119 print $fh $$zipref; 120 util::fclose($fh); 121 } 122 my @unzipopts_getlist = ("-Z", "-1"); 123 my @cmd = ($unzippath, @unzipopts_getlist, $tmpfile); 124 my $file_list; 125 my $status = util::syscmd( 126 command => \@cmd, 127 option => { 128 "stdout" => \$file_list, 129 "stderr" => "/dev/null", 130 "mode_stdout" => "wt", 131 "mode_stderr" => "wt", 132 }, 133 ); 134 if ($status == 0) { 135 while ($file_list =~ m!\n 136 (Documents/1/Pages/\d+\.fpage)!gx){ 137 my $filename = $1; 138 push(@$pagesref, $filename); 139 } 140 } 141 unlink $tmpfile; 142} 143 144sub filter_contentfile ($$$$$) { 145 my ($contref, $weighted_str, $headings, $fields) = @_; 146 my @pagefiles; 147 my $xml = ""; 148 149 get_pages_list($contref, \@pagefiles); 150 foreach my $filename (@pagefiles){ 151 my $xmlcont = ''; 152 xps::zip_read($contref, $filename, \$xmlcont); 153 codeconv::toeuc(\$xmlcont); 154 xps::get_document(\$xmlcont); 155 $xml .= ' ' . $xmlcont 156 } 157 158 ooo::remove_all_tag(\$xml); 159 ooo::decode_entity(\$xml); 160 161 # Code conversion for Japanese document. 162 if (util::islang("ja")) { 163 codeconv::normalize_eucjp(\$xml); 164 } 165 166 $$contref = $xml; 167 168 gfilter::line_adjust_filter($contref); 169 gfilter::line_adjust_filter($weighted_str); 170 gfilter::white_space_adjust_filter($contref); 171 gfilter::show_filter_debug_info($contref, $weighted_str, 172 $fields, $headings); 173} 174 175sub get_keywords ($) { 176 my ($contref) = @_; 177 my @keywordstmp; 178 push(@keywordstmp ,$$contref =~ m!<cp:keywords>(.*)</cp:keywords>!g); 179 return join(" ",@keywordstmp); 180} 181 182sub get_document ($) { 183 my ($contref) = @_; 184 my @documents; 185 push(@documents ,$$contref =~ m!UnicodeString="([^"]*)"!g); 186 $$contref = join(" ",@documents); 187} 188 1891; 190