1#!/usr/local/bin/perl
2=head1 NAME
3
4	docbook2odf - DocBook to OpenDocument XSL Transformation utils
5	Copyright (C) 2006 Roman Fordinal
6	http://open.comsultia.com/docbook2odf/
7
8=head1 LICENSE
9
10	This program is free software; you can redistribute it and/or
11	modify it under the terms of the GNU General Public License
12	as published by the Free Software Foundation; either version 2
13	of the License, or (at your option) any later version.
14
15	This program is distributed in the hope that it will be useful,
16	but WITHOUT ANY WARRANTY; without even the implied warranty of
17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18	GNU General Public License for more details.
19
20=cut
21
22use strict;
23#use utf8;
24#use encoding 'utf-8';
25#use open ':utf8', ':std';
26
27# depends on
28use Cwd;
29use File::Copy;
30use File::Path;
31use Getopt::Long;
32
33# variable depends
34my $USE_IMAGE_MAGICK = 0;
35if (eval "require Image::Magick")
36{
37	$USE_IMAGE_MAGICK = 1;
38}
39
40my $USE_SABLOTRON = 0;
41my $USE_LIBXSLT = 0;
42if (eval "require XML::Sablotron;")
43{
44	$USE_SABLOTRON = 1;
45}
46elsif (eval "require XML::LibXSLT;")
47{
48	$USE_LIBXSLT = 1;
49}
50
51my $USE_ZIP = 0;
52if (eval "require Archive::Zip qw( :ERROR_CODES :CONSTANTS )")
53{
54	$USE_ZIP = 1;
55}
56
57################################################################
58# INITIALIZATION
59################################################################
60
61
62# initial variables
63our $PATH=Cwd::abs_path();
64our $PATH_INSTALL="/usr/share/docbook2odf/xsl"; # not final
65our $PATH_XSL = do
66{
67	(-e $PATH.'/../xsl/docbook.xsl') ? $PATH.'/../xsl' :
68	$PATH_INSTALL
69};
70my ($input, $output, $output_dir);
71my ($help, $quiet, $verbose, $debug);
72my ($params, $force);
73
74our $program_Date='$Date: 2007-05-19 12:55:08 +0200 (So, 19 máj 2007) $';
75our $program_Rev='$Rev: 235 $';
76our $program_Author='$Author: fordinal $';
77our $program_Id='$Id: docbook2odf 235 2007-05-19 10:55:08Z fordinal $';
78
79$program_Rev=~/(\d+)/;
80my $program_version="0.".$1;
81my $program_name="docbook2odf ".$program_version;
82my $program_description="a non-interactive docbook to opendocument convertor";
83my $program_usage="docbookfile [-o opendocumentfile]";# [--params]";
84
85my $result = GetOptions
86	(
87		"o|output-file=s"  => \$output,
88		"output-dir=s"   => \$output_dir,
89		"params=s"       => \$params,
90		"xsl-file=s"     => \$PATH_XSL,
91		"debug"          => \$debug,
92		"quiet"          => \$quiet,
93		"verbose"        => \$verbose,
94		"help"           => \$help,
95		"f|force"        => \$force,
96	);
97my $input = $ARGV[0];
98
99if ($help)
100{
101	print "$program_name, $program_description\n";
102	print "Usage: docbook2odf $program_usage\n";
103	print "\n";
104	print <<"HELP";
105Arguments:
106  -o|--output-file    specify output opendocument filename.
107  --output-dir        specify output directory.
108  --params            list of params ( var=value,var2=value2 ).
109  --xsl-file          use this xsl stylesheet instead.
110  --debug             show debug messages.
111  -q|--quiet             quiet (no output).
112  -v|--verbose           verbose (extra output).
113  -h|--help              print this help.
114  -f|--force          overwrite existing output filename.
115HELP
116	exit;
117}
118
119if (!$input)
120{
121	print "$program_name, $program_description\n";
122	print "Usage: docbook2odf $program_usage\n";
123	print "Try `docbook2odf --help` for more information\n";
124	exit;
125}
126
127
128
129##################################################################################
130# START
131##################################################################################
132
133my $output_file=$output;
134if (!$output)
135{
136	$output_file=$input;
137	# if I run this script from commandline
138	# the output filename is in current workdir
139	# otherwise in directory of input filename (docbook)
140	if ($ENV{'TERM'} && !$output_dir)
141	{
142		$output_file=~s|^.*/||;
143		$output_file=$PATH.'/'.$output_file;
144	}
145	elsif ($output_dir)
146	{
147		$output_file=~s|^.*/||;
148	}
149	else
150	{
151		# output directory is in input file directory
152	}
153	$output_file=~s/\.(docbook|db|xml)$//;
154	$output_file.=".od";
155}
156if ($output_dir)
157{
158	$output_dir=~s|/$||;
159}
160elsif ($output_file=~s|^(.*/)||)
161{
162	$output_dir=$1;
163	$output_dir=~s|/$||;
164}
165else
166{
167	$output_dir=$PATH;
168}
169
170my $input_file=$input;
171my $input_dir;
172if ($input_file=~s|^(.*/)||)
173{
174	$input_dir=$1;
175	$input_dir=~s|/$||;
176}
177else
178{
179	$input_dir='.';
180}
181
182
183# program information
184if ($verbose)
185{
186	print "$program_name, $program_description\n";
187}
188
189# input / output files
190if ($verbose)
191{
192	print "\n";
193	print "input file:   \"$input\"\n";
194	print "output file:  \"$output_dir/$output_file?\"\n";
195	print "stylesheets:  \"$PATH_XSL\"\n";
196}
197
198
199
200##################################################################################
201# TEMPORARY DIRECTORY
202##################################################################################
203
204# create a temporary directory
205#my $TEMP=$output_dir.'/'.$output_file.'.temp';
206my $TEMP='/tmp/docbook2odf-'.$$.'-'.$output_file.'.tmp';
207print "Creating TEMP directory ($TEMP)\n" if $debug;
208rmtree $TEMP if -e $TEMP; # delete TEMP directory if exists
209mkpath $TEMP;
210mkpath $TEMP.'/Pictures';
211mkpath $TEMP.'/META-INF';
212mkpath $TEMP.'/process';
213
214
215
216##################################################################################
217# TRANSFORMATION
218##################################################################################
219print "XSL transformation\n" if $debug;
220
221# DOCBOOK -> ODF (one big xml)
222
223# parse params;
224my @params_arr;
225foreach my $param(split(',',$params))
226{
227	foreach (split('=',$param))
228	{
229		push @params_arr,$_;
230	}
231}
232
233my $XML_DOC = $input;
234my $XSL = $PATH_XSL.'/docbook.xsl';
235open (HND, '>'.$TEMP.'/process/full.xml');
236print HND xml_process($XSL, $XML_DOC,@params_arr);
237
238# MIMETYPE
239open (HND, '>'.$TEMP.'/mimetype');
240print HND 'application/vnd.oasis.opendocument.text';
241close HND;
242
243
244
245##################################################################################
246# SPLIT
247##################################################################################
248
249$XML_DOC = $TEMP.'/process/full.xml';
250$XSL = $PATH_XSL.'/odf.xsl';
251
252# MANIFEST
253open (HND, '>'.$TEMP.'/META-INF/manifest.xml');
254print HND xml_process($XSL, $XML_DOC, 'part'=>'manifest');
255
256# META
257open (HND, '>'.$TEMP.'/meta.xml');
258print HND xml_process($XSL, $XML_DOC, 'part'=>'meta');
259
260# STYLES
261open (HND, '>'.$TEMP.'/styles.xml');
262binmode(HND);
263print HND xml_process($XSL, $XML_DOC, 'part'=>'styles');
264
265# CONTENT
266my $content = xml_process($XSL, $XML_DOC, 'part'=>'content');
267#utf8::encode($content);
268if ($debug)
269{
270	open (HND, '>'.$TEMP.'/process/content.xml');
271	binmode(HND);
272	print HND $content;
273}
274
275print "\n" if $debug;
276
277
278
279##################################################################################
280# POSTPROCESSING
281##################################################################################
282
283do # post processing of content
284{
285	print "content postprocess\n" if $debug;
286	# copy pictures into TEMP directory
287
288	my @uris;
289	my $i=1;
290	while ($content=~s|<([\w:]+)([^<]*?)(xlink:href)="(.*?)"|<$1$2xlink:href=<!TMPHREF-$i!>|)
291	{
292		my $tag=$1;
293		my $oth=$2;
294		my $href=$3;
295		my $uri=$4;
296
297		print "-postprocessing $href\[$i]='$uri' in tag '$tag'\n" if $debug;
298
299		if ($tag ne "draw:image")
300		{
301			$uris[$i]=$4;
302			$i++;
303			next;
304		}
305
306		my $ext=$uri;$ext=~s|^.*\.||;
307		if ($uri=~/^\//)
308		{
309			# uri processing
310		}
311		else
312		{
313			# uri processing
314			$uri=$input_dir."/".$uri;
315		}
316
317		my $filename=sprintf("%07d",$i);
318		$uris[$i]='Pictures/'.$filename.".".$ext;
319		my $dest=$TEMP.'/Pictures/'.$filename.'.'.$ext;
320		print "-copy '$uri'->'$dest'\n" if $debug;
321		copy($uri,$dest);
322		$i++;
323	}
324	$content=~s|<!TMPHREF-(\d+)!>|"$uris[$1]"|g;
325
326	while($content=~s|function:([\w:\-]+):\((.*?)\)|<!TMP!>|)
327	{
328		my $function=$1;
329		my $data=$2;
330		print "function='$function' data='$data'\n" if $debug;
331		if ($USE_IMAGE_MAGICK)
332		{
333			if ($function eq "getimage-width")
334			{
335				my $p = new Image::Magick;
336				$data=$input_dir."/".$data unless $data=~/^\//;
337				$p->Read($data);
338				my $width=($p->Get('columns')*0.02644)."cm";
339				print "output='$width'\n" if $debug;
340				$content=~s|<!TMP!>|$width|;
341				next;
342			}
343			if ($function eq "getimage-height")
344			{
345				my $p = new Image::Magick;
346				$data=$input_dir."/".$data unless $data=~/^\//;
347				$p->Read($data);
348				my $height=($p->Get('height')*0.02644)."cm";
349				print "output='$height'\n" if $debug;
350				$content=~s|<!TMP!>|$height|;
351				next;
352			}
353		}
354		elsif ($function eq "getimage-width" || $function eq "getimage-height")
355		{
356			$data=$input_dir."/".$data unless $data=~/^\//;
357			my ($width, $height) = img_dimmensions($data);
358			print "output='$width'\n" if $debug;
359			print "output='$height'\n" if $debug;
360			($function=~/width/) and $content=~s|<!TMP!>|$width|;
361			($function=~/height/) and $content=~s|<!TMP!>|$height|;
362			next;
363		}
364		#751mm=284px*2.644 196mm=74px
365		$content=~s|<!TMP!>||;
366	}
367
368	# convert alternative nbsp character to ODF spaces
369	$content=~s|([\xC2\x82]+)|'<text:s text:c="'.length($1).'"/>'|eg;
370};
371print "\n" if $debug;
372
373open (HND, '>'.$TEMP.'/content.xml');
374binmode(HND);
375print HND $content;
376
377
378
379##################################################################################
380# ZIPPING
381##################################################################################
382
383# when --output-file is not defined
384# then I run autodetection of document type
385$output_file.=do
386{
387	($content=~/<office:text/) ? 't' :
388	($content=~/<office:presentation/) ? 'p' :
389	($content=~/<office:spread/) ? 's' :
390	'm'
391} unless $output;
392
393if (-e $output_dir.'/'.$output_file && !$force)
394{
395	rmtree $TEMP;
396	die "file $output_dir/$output_file exists\n";
397}
398
399if (!$debug)
400{
401	rmtree $TEMP.'/process';
402}
403
404# zipping directory
405print "zipping directory '$TEMP' (PWD='$PATH')\n" if $debug;
406my $zip;
407if ($USE_ZIP)
408{
409	print "using Archive::Zip\n" if $debug;
410	$zip = Archive::Zip->new();
411	$zip->addTree($TEMP);
412	$zip->writeToFileNamed($output_dir.'/'.$output_file);
413}
414else
415{
416	print "using zip command\n" if $debug;
417	chdir($TEMP);
418	my $out=`zip -rq "$output_dir/$output_file" *`;
419}
420
421print "\n" if $debug;
422
423print "Saved $output_file\n" unless $quiet;
424
425
426
427##################################################################################
428# CLEANING
429##################################################################################
430
431if (!$debug)
432{
433	# delete temporary directory
434	print "delete temporary directory='$TEMP' (PWD='$PATH')\n" if $debug;
435	chdir '..';
436	rmtree $TEMP;
437}
438
439
440
441
442
443##################################################################################
444# FUNCTIONS
445##################################################################################
446
447
448sub img_dimmensions
449{
450	my $imgfile = shift;
451	my $tmpdir = "/tmp/docbook2odf-$$-".int(100*rand());
452	my $ext = '';
453	mkdir($tmpdir);
454	($imgfile =~ /\.(\w+)$/) and $ext = $1;
455
456	# Copy to make sure the file name is reasonable.
457	copy($imgfile,"$tmpdir/img-file.$ext");
458	$imgfile = "img-file.$ext";
459
460	# Convert to PNG.
461	chdir($tmpdir);
462	if ($ext eq "gif")
463	{
464		`gif2png -O $imgfile`;
465	}
466	elsif ($ext ne "png")
467	{
468		`anytopnm $imgfile 2> /dev/null| pnmtopng > img-file.png`;
469	}
470	$imgfile = "img-file.png";
471
472	# Get the image dimmensions.
473	my $data = `file $imgfile`;
474
475	rmtree($tmpdir);
476
477	($data =~ /PNG image data, (\d+) x (\d+)/) and return (($1*0.02644)."cm", ($2*0.02644)."cm");
478	return ("3cm","3cm");
479}
480
481
482
483sub xml_process
484{
485	my $XSL = shift;
486	my $XML_DOC = shift;
487
488	print "param = @_\n" if $debug;
489
490	if ($USE_SABLOTRON)
491	{
492		print "xslt by sablotron\n" if $debug;
493		my $sab = new XML::Sablotron();
494		my $situa = new XML::Sablotron::Situation();
495		while (@_)
496		{
497			my $name = shift;
498			my $val  = shift;
499			$sab->addParam($situa, $name, $val);
500		}
501		$sab->process($situa, $XSL, $XML_DOC, 'arg:/output');
502		return $sab->getResultArg('arg:/output');
503	}
504 	elsif ($USE_LIBXSLT)
505 	{
506 		print "xslt by libxslt\n" if $debug;
507		my $xslt = XML::LibXSLT->new();
508		my $stylesheet = $xslt->parse_stylesheet_file($XSL);
509		my @params;
510 		while (@_)
511 		{
512 			my $name = shift;
513 			my $val  = shift;
514			push @params, $name;
515			push @params, $val;
516 		}
517		my $results = $stylesheet->transform_file($XML_DOC,
518			XML::LibXSLT::xpath_to_string(@params));
519		return $stylesheet->output_string($results);
520 	}
521	else
522	{
523		print "xslt by xsltproc\n" if $debug;
524		my $PARAM = '';
525		while (@_)
526		{
527			my $name = shift;
528			my $val  = shift;
529			$PARAM .= " --stringparam $name $val ";
530		}
531		return `xsltproc $PARAM $XSL $XML_DOC`;
532	}
533}
534
535
5361;