1#!/usr/local/bin/perl 2=head1 NAME 3 4 docbook2odf - DocBook to OpenDocument XSL Transformation utils 5 Copyright (C) 2006 Roman Fordinal 6 http://open.comsultia.com/docbook2odf/ 7 8=head1 LICENSE 9 10 This program is free software; you can redistribute it and/or 11 modify it under the terms of the GNU General Public License 12 as published by the Free Software Foundation; either version 2 13 of the License, or (at your option) any later version. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20=cut 21 22use strict; 23#use utf8; 24#use encoding 'utf-8'; 25#use open ':utf8', ':std'; 26 27# depends on 28use Cwd; 29use File::Copy; 30use File::Path; 31use Getopt::Long; 32 33# variable depends 34my $USE_IMAGE_MAGICK = 0; 35if (eval "require Image::Magick") 36{ 37 $USE_IMAGE_MAGICK = 1; 38} 39 40my $USE_SABLOTRON = 0; 41my $USE_LIBXSLT = 0; 42if (eval "require XML::Sablotron;") 43{ 44 $USE_SABLOTRON = 1; 45} 46elsif (eval "require XML::LibXSLT;") 47{ 48 $USE_LIBXSLT = 1; 49} 50 51my $USE_ZIP = 0; 52if (eval "require Archive::Zip qw( :ERROR_CODES :CONSTANTS )") 53{ 54 $USE_ZIP = 1; 55} 56 57################################################################ 58# INITIALIZATION 59################################################################ 60 61 62# initial variables 63our $PATH=Cwd::abs_path(); 64our $PATH_INSTALL="/usr/share/docbook2odf/xsl"; # not final 65our $PATH_XSL = do 66{ 67 (-e $PATH.'/../xsl/docbook.xsl') ? $PATH.'/../xsl' : 68 $PATH_INSTALL 69}; 70my ($input, $output, $output_dir); 71my ($help, $quiet, $verbose, $debug); 72my ($params, $force); 73 74our $program_Date='$Date: 2007-05-19 12:55:08 +0200 (So, 19 máj 2007) $'; 75our $program_Rev='$Rev: 235 $'; 76our $program_Author='$Author: fordinal $'; 77our $program_Id='$Id: docbook2odf 235 2007-05-19 10:55:08Z fordinal $'; 78 79$program_Rev=~/(\d+)/; 80my $program_version="0.".$1; 81my $program_name="docbook2odf ".$program_version; 82my $program_description="a non-interactive docbook to opendocument convertor"; 83my $program_usage="docbookfile [-o opendocumentfile]";# [--params]"; 84 85my $result = GetOptions 86 ( 87 "o|output-file=s" => \$output, 88 "output-dir=s" => \$output_dir, 89 "params=s" => \$params, 90 "xsl-file=s" => \$PATH_XSL, 91 "debug" => \$debug, 92 "quiet" => \$quiet, 93 "verbose" => \$verbose, 94 "help" => \$help, 95 "f|force" => \$force, 96 ); 97my $input = $ARGV[0]; 98 99if ($help) 100{ 101 print "$program_name, $program_description\n"; 102 print "Usage: docbook2odf $program_usage\n"; 103 print "\n"; 104 print <<"HELP"; 105Arguments: 106 -o|--output-file specify output opendocument filename. 107 --output-dir specify output directory. 108 --params list of params ( var=value,var2=value2 ). 109 --xsl-file use this xsl stylesheet instead. 110 --debug show debug messages. 111 -q|--quiet quiet (no output). 112 -v|--verbose verbose (extra output). 113 -h|--help print this help. 114 -f|--force overwrite existing output filename. 115HELP 116 exit; 117} 118 119if (!$input) 120{ 121 print "$program_name, $program_description\n"; 122 print "Usage: docbook2odf $program_usage\n"; 123 print "Try `docbook2odf --help` for more information\n"; 124 exit; 125} 126 127 128 129################################################################################## 130# START 131################################################################################## 132 133my $output_file=$output; 134if (!$output) 135{ 136 $output_file=$input; 137 # if I run this script from commandline 138 # the output filename is in current workdir 139 # otherwise in directory of input filename (docbook) 140 if ($ENV{'TERM'} && !$output_dir) 141 { 142 $output_file=~s|^.*/||; 143 $output_file=$PATH.'/'.$output_file; 144 } 145 elsif ($output_dir) 146 { 147 $output_file=~s|^.*/||; 148 } 149 else 150 { 151 # output directory is in input file directory 152 } 153 $output_file=~s/\.(docbook|db|xml)$//; 154 $output_file.=".od"; 155} 156if ($output_dir) 157{ 158 $output_dir=~s|/$||; 159} 160elsif ($output_file=~s|^(.*/)||) 161{ 162 $output_dir=$1; 163 $output_dir=~s|/$||; 164} 165else 166{ 167 $output_dir=$PATH; 168} 169 170my $input_file=$input; 171my $input_dir; 172if ($input_file=~s|^(.*/)||) 173{ 174 $input_dir=$1; 175 $input_dir=~s|/$||; 176} 177else 178{ 179 $input_dir='.'; 180} 181 182 183# program information 184if ($verbose) 185{ 186 print "$program_name, $program_description\n"; 187} 188 189# input / output files 190if ($verbose) 191{ 192 print "\n"; 193 print "input file: \"$input\"\n"; 194 print "output file: \"$output_dir/$output_file?\"\n"; 195 print "stylesheets: \"$PATH_XSL\"\n"; 196} 197 198 199 200################################################################################## 201# TEMPORARY DIRECTORY 202################################################################################## 203 204# create a temporary directory 205#my $TEMP=$output_dir.'/'.$output_file.'.temp'; 206my $TEMP='/tmp/docbook2odf-'.$$.'-'.$output_file.'.tmp'; 207print "Creating TEMP directory ($TEMP)\n" if $debug; 208rmtree $TEMP if -e $TEMP; # delete TEMP directory if exists 209mkpath $TEMP; 210mkpath $TEMP.'/Pictures'; 211mkpath $TEMP.'/META-INF'; 212mkpath $TEMP.'/process'; 213 214 215 216################################################################################## 217# TRANSFORMATION 218################################################################################## 219print "XSL transformation\n" if $debug; 220 221# DOCBOOK -> ODF (one big xml) 222 223# parse params; 224my @params_arr; 225foreach my $param(split(',',$params)) 226{ 227 foreach (split('=',$param)) 228 { 229 push @params_arr,$_; 230 } 231} 232 233my $XML_DOC = $input; 234my $XSL = $PATH_XSL.'/docbook.xsl'; 235open (HND, '>'.$TEMP.'/process/full.xml'); 236print HND xml_process($XSL, $XML_DOC,@params_arr); 237 238# MIMETYPE 239open (HND, '>'.$TEMP.'/mimetype'); 240print HND 'application/vnd.oasis.opendocument.text'; 241close HND; 242 243 244 245################################################################################## 246# SPLIT 247################################################################################## 248 249$XML_DOC = $TEMP.'/process/full.xml'; 250$XSL = $PATH_XSL.'/odf.xsl'; 251 252# MANIFEST 253open (HND, '>'.$TEMP.'/META-INF/manifest.xml'); 254print HND xml_process($XSL, $XML_DOC, 'part'=>'manifest'); 255 256# META 257open (HND, '>'.$TEMP.'/meta.xml'); 258print HND xml_process($XSL, $XML_DOC, 'part'=>'meta'); 259 260# STYLES 261open (HND, '>'.$TEMP.'/styles.xml'); 262binmode(HND); 263print HND xml_process($XSL, $XML_DOC, 'part'=>'styles'); 264 265# CONTENT 266my $content = xml_process($XSL, $XML_DOC, 'part'=>'content'); 267#utf8::encode($content); 268if ($debug) 269{ 270 open (HND, '>'.$TEMP.'/process/content.xml'); 271 binmode(HND); 272 print HND $content; 273} 274 275print "\n" if $debug; 276 277 278 279################################################################################## 280# POSTPROCESSING 281################################################################################## 282 283do # post processing of content 284{ 285 print "content postprocess\n" if $debug; 286 # copy pictures into TEMP directory 287 288 my @uris; 289 my $i=1; 290 while ($content=~s|<([\w:]+)([^<]*?)(xlink:href)="(.*?)"|<$1$2xlink:href=<!TMPHREF-$i!>|) 291 { 292 my $tag=$1; 293 my $oth=$2; 294 my $href=$3; 295 my $uri=$4; 296 297 print "-postprocessing $href\[$i]='$uri' in tag '$tag'\n" if $debug; 298 299 if ($tag ne "draw:image") 300 { 301 $uris[$i]=$4; 302 $i++; 303 next; 304 } 305 306 my $ext=$uri;$ext=~s|^.*\.||; 307 if ($uri=~/^\//) 308 { 309 # uri processing 310 } 311 else 312 { 313 # uri processing 314 $uri=$input_dir."/".$uri; 315 } 316 317 my $filename=sprintf("%07d",$i); 318 $uris[$i]='Pictures/'.$filename.".".$ext; 319 my $dest=$TEMP.'/Pictures/'.$filename.'.'.$ext; 320 print "-copy '$uri'->'$dest'\n" if $debug; 321 copy($uri,$dest); 322 $i++; 323 } 324 $content=~s|<!TMPHREF-(\d+)!>|"$uris[$1]"|g; 325 326 while($content=~s|function:([\w:\-]+):\((.*?)\)|<!TMP!>|) 327 { 328 my $function=$1; 329 my $data=$2; 330 print "function='$function' data='$data'\n" if $debug; 331 if ($USE_IMAGE_MAGICK) 332 { 333 if ($function eq "getimage-width") 334 { 335 my $p = new Image::Magick; 336 $data=$input_dir."/".$data unless $data=~/^\//; 337 $p->Read($data); 338 my $width=($p->Get('columns')*0.02644)."cm"; 339 print "output='$width'\n" if $debug; 340 $content=~s|<!TMP!>|$width|; 341 next; 342 } 343 if ($function eq "getimage-height") 344 { 345 my $p = new Image::Magick; 346 $data=$input_dir."/".$data unless $data=~/^\//; 347 $p->Read($data); 348 my $height=($p->Get('height')*0.02644)."cm"; 349 print "output='$height'\n" if $debug; 350 $content=~s|<!TMP!>|$height|; 351 next; 352 } 353 } 354 elsif ($function eq "getimage-width" || $function eq "getimage-height") 355 { 356 $data=$input_dir."/".$data unless $data=~/^\//; 357 my ($width, $height) = img_dimmensions($data); 358 print "output='$width'\n" if $debug; 359 print "output='$height'\n" if $debug; 360 ($function=~/width/) and $content=~s|<!TMP!>|$width|; 361 ($function=~/height/) and $content=~s|<!TMP!>|$height|; 362 next; 363 } 364 #751mm=284px*2.644 196mm=74px 365 $content=~s|<!TMP!>||; 366 } 367 368 # convert alternative nbsp character to ODF spaces 369 $content=~s|([\xC2\x82]+)|'<text:s text:c="'.length($1).'"/>'|eg; 370}; 371print "\n" if $debug; 372 373open (HND, '>'.$TEMP.'/content.xml'); 374binmode(HND); 375print HND $content; 376 377 378 379################################################################################## 380# ZIPPING 381################################################################################## 382 383# when --output-file is not defined 384# then I run autodetection of document type 385$output_file.=do 386{ 387 ($content=~/<office:text/) ? 't' : 388 ($content=~/<office:presentation/) ? 'p' : 389 ($content=~/<office:spread/) ? 's' : 390 'm' 391} unless $output; 392 393if (-e $output_dir.'/'.$output_file && !$force) 394{ 395 rmtree $TEMP; 396 die "file $output_dir/$output_file exists\n"; 397} 398 399if (!$debug) 400{ 401 rmtree $TEMP.'/process'; 402} 403 404# zipping directory 405print "zipping directory '$TEMP' (PWD='$PATH')\n" if $debug; 406my $zip; 407if ($USE_ZIP) 408{ 409 print "using Archive::Zip\n" if $debug; 410 $zip = Archive::Zip->new(); 411 $zip->addTree($TEMP); 412 $zip->writeToFileNamed($output_dir.'/'.$output_file); 413} 414else 415{ 416 print "using zip command\n" if $debug; 417 chdir($TEMP); 418 my $out=`zip -rq "$output_dir/$output_file" *`; 419} 420 421print "\n" if $debug; 422 423print "Saved $output_file\n" unless $quiet; 424 425 426 427################################################################################## 428# CLEANING 429################################################################################## 430 431if (!$debug) 432{ 433 # delete temporary directory 434 print "delete temporary directory='$TEMP' (PWD='$PATH')\n" if $debug; 435 chdir '..'; 436 rmtree $TEMP; 437} 438 439 440 441 442 443################################################################################## 444# FUNCTIONS 445################################################################################## 446 447 448sub img_dimmensions 449{ 450 my $imgfile = shift; 451 my $tmpdir = "/tmp/docbook2odf-$$-".int(100*rand()); 452 my $ext = ''; 453 mkdir($tmpdir); 454 ($imgfile =~ /\.(\w+)$/) and $ext = $1; 455 456 # Copy to make sure the file name is reasonable. 457 copy($imgfile,"$tmpdir/img-file.$ext"); 458 $imgfile = "img-file.$ext"; 459 460 # Convert to PNG. 461 chdir($tmpdir); 462 if ($ext eq "gif") 463 { 464 `gif2png -O $imgfile`; 465 } 466 elsif ($ext ne "png") 467 { 468 `anytopnm $imgfile 2> /dev/null| pnmtopng > img-file.png`; 469 } 470 $imgfile = "img-file.png"; 471 472 # Get the image dimmensions. 473 my $data = `file $imgfile`; 474 475 rmtree($tmpdir); 476 477 ($data =~ /PNG image data, (\d+) x (\d+)/) and return (($1*0.02644)."cm", ($2*0.02644)."cm"); 478 return ("3cm","3cm"); 479} 480 481 482 483sub xml_process 484{ 485 my $XSL = shift; 486 my $XML_DOC = shift; 487 488 print "param = @_\n" if $debug; 489 490 if ($USE_SABLOTRON) 491 { 492 print "xslt by sablotron\n" if $debug; 493 my $sab = new XML::Sablotron(); 494 my $situa = new XML::Sablotron::Situation(); 495 while (@_) 496 { 497 my $name = shift; 498 my $val = shift; 499 $sab->addParam($situa, $name, $val); 500 } 501 $sab->process($situa, $XSL, $XML_DOC, 'arg:/output'); 502 return $sab->getResultArg('arg:/output'); 503 } 504 elsif ($USE_LIBXSLT) 505 { 506 print "xslt by libxslt\n" if $debug; 507 my $xslt = XML::LibXSLT->new(); 508 my $stylesheet = $xslt->parse_stylesheet_file($XSL); 509 my @params; 510 while (@_) 511 { 512 my $name = shift; 513 my $val = shift; 514 push @params, $name; 515 push @params, $val; 516 } 517 my $results = $stylesheet->transform_file($XML_DOC, 518 XML::LibXSLT::xpath_to_string(@params)); 519 return $stylesheet->output_string($results); 520 } 521 else 522 { 523 print "xslt by xsltproc\n" if $debug; 524 my $PARAM = ''; 525 while (@_) 526 { 527 my $name = shift; 528 my $val = shift; 529 $PARAM .= " --stringparam $name $val "; 530 } 531 return `xsltproc $PARAM $XSL $XML_DOC`; 532 } 533} 534 535 5361;