1#!/usr/bin/perl -w
2#
3# Bacula Systems - Philippe Chauvat
4# 27 jul 2012
5#
6# This script is designed to translate Bacula enterprise LaTeX2HTML
7# documentation files to something more "tunable" / "adaptable" from a CSS
8# point of view.
9#
10# $1 is an HTML file to analyze and translate
11# The output is automatically send to $1.out
12#
13# - add some ids, class
14# - re-order some piece of HTML code
15#
16# This script is based on HTML::Parser module
17#
18# args:
19# -i: HTML input file
20# -o: HTML output file
21# -j: javascript directory
22# -c: css directory
23# -p: images (pictures) directory
24# -n: Manual name
25# -r: Source directory (ako part of -i arg)
26# -?: help / usage
27# -d: debug requested
28use HTML::Parser ;
29use HTML::TreeBuilder ;
30use HTML::PullParser ;
31use Getopt::Long ;
32use File::Basename ;
33use Data::Dumper ;
34sub usage {
35    print "translatedoc.pl -i | --input html-source-file
36 [ -o | --output html-destination-file ]
37 [ -j | --javascript javascript-diretory ]
38 [ -c | --css css-directory ]
39 [ -p | --pictures pictures-directory ]
40 [ -n | --name manual_name ]
41 [ -d | --debug ]
42 [ -r | --source-directory the_original_root_directory ]
43 [ --help | -? ]\n" ;
44    exit 1 ;
45}
46#
47# Send message to output in case of debug only
48# ======================
49sub debugdump {
50    my ($what,$msg) = @_ ;
51    if ($debug) {
52	print "\n===============================\nBegin of $msg\n" ;
53	$what->dump ;
54	print "\n===============================\nEnd of $msg\n\n" ;
55    }
56}
57#
58# Args to Vars
59our($inputfile,$outputfile,$help,$debug,$mytree,$extractmenu,$picturesdir,
60    $cssdir,$javascriptdir,$manualname,$sourcedir) ;
61#
62# Usage in case of missing arguments
63usage() unless($#ARGV > -1) ;
64#
65# Input file / Output file
66GetOptions("input|i=s" => \$inputfile,
67	   "output|o=s" => \$outputfile,
68	   "pictures|p=s" => \$picturesdir,
69	   "css|c=s" => \$cssdir,
70	   "source-directory|r=s" => \$sourcedir,
71	   "javascript|j=s" => \$javascriptdir,
72	   "name|n=s" => \$manualname,
73	   "debug|d" => \$debug,
74	   "help|?" => \$help) or usage() ;
75usage() if ($help) ;
76usage() unless (defined $inputfile) ;
77
78die "$inputfile does not exists.\n" unless -e $inputfile ;
79
80if (! defined $outputfile ) {
81    $outputfile = "./" . basename($inputfile) . ".out" ;
82}
83
84if (! defined $picturesdir ) {
85    $picturesdir = "../images" ;
86}
87
88if (! defined $cssdir ) {
89    $cssdir = "../css" ;
90}
91
92if (! defined $javascriptdir ) {
93    $javascriptdir = "../js" ;
94}
95if (! defined $manualname) {
96    $manualname = "main" ;
97}
98#
99# Build HTML Tree of existing page
100$mytree = HTML::TreeBuilder->new ;
101$mytree->parse_file($inputfile) ;
102debugdump($mytree,"E1: ") ;
103#
104# Find the beginning of the content
105# Which is also a point where to put
106# the menu
107$beginning_of_content = $mytree->look_down('_tag','h1') ;
108$beginning_of_content = $mytree->look_down('_tag','h2') unless ($beginning_of_content) ;
109die "The only thing we could test is a <H1> / <H2> tags, which does not exist there...:$!\n"  unless($beginning_of_content) ;
110#
111# Remove every 'dirty' lines
112# between <body> and <h1> tag
113# What is "before" the <h1> tag (between <body> and <h1>) is just dropped
114# my @lefts = $beginning_of_content->left() ;
115# foreach my $l (@lefts) {
116#     $l->detach_content() ;
117#     $l->delete_content() ;
118#     $l->detach() ;
119#     $l->delete() ;
120# }
121# #
122# # Remove Bacula community logo
123# if ($childlinks = $beginning_of_content->look_down('_tag','img','alt','\\includegraphics{bacula-logo.eps}')) {
124#     $childlinks->detach() ;
125#     $childlinks->delete() ;
126# }
127# # End remove Bacula logo
128# #
129# # Remove 'address' tag
130# if ($childlinks = $mytree->look_down('_tag','address')) {
131#     $childlinks->detach() ;
132#     $childlinks->delete() ;
133# }
134# # End remove address
135#
136# End remove dirty lines
137#
138# Replace textregistered images with the HTML special char
139my @images = $mytree->look_down('_tag','img') ;
140foreach $childlinks (@images) {
141    my $alttext = $childlinks->attr('alt') ;
142#    print "Alt: $alttext\n" ;
143    if ($alttext =~ /.*registe.*/) {
144	$childlinks->preinsert(HTML::Element->new_from_lol(['span', {'class' => 'expochar' }, '&reg;'])) ;
145	$childlinks->detach() ;
146	$childlinks->delete() ;
147    }
148    if ($alttext =~ /.*bacula.*-logo.*/) {
149	$childlinks->detach() ;
150	$childlinks->delete() ;
151    }
152}
153@images = $mytree->look_down('_tag','img') ;
154foreach $childlinks (@images) {
155    my $img = $childlinks->attr('src') ;
156    if ($img =~ /^\.\//) {
157	$img =~ s/\.\/// ;
158	$img = $picturesdir . '/' . $img ;
159	$childlinks->attr('src',$img) ;
160#	print "img: " . $img . "\n" ;
161    }
162}
163if ($childlinks = $mytree->look_down('_tag','title')) {
164    # foreach my $i ($childlinks->content_refs_list) {
165    # 	next if ref $$i ;
166    # 	print "contenu: " . $$i . "\n" ;
167    # }
168    $childlinks->postinsert(HTML::Element->new_from_lol(['meta',{ 'http-equiv' => 'content-type', 'content' => 'text/html; charset=utf-8' } ])) ;
169}
170if ($childlinks = $mytree->look_down('_tag', 'div','class','author_info')) {
171    $childlinks->preinsert(
172	HTML::Element->new_from_lol(
173	    [ 'h1', { 'align' => 'center' },
174	      [ 'div', { 'align' => 'center' },
175		[ 'big', { 'class' => 'LARGE' }, "The Leading Open Source Backup Solution" ]
176	      ]
177	    ]
178	)) ;
179#
180# Adding the logo
181    if (my @regs = $mytree->look_down('_tag', 'span','class','MATH')) {
182	foreach $childlinks (@regs) {
183	    $childlinks->preinsert(HTML::Element->new_from_lol(['span', {'class' => 'expochar' }, '&reg;'])) ;
184	    $childlinks->detach() ;
185	    $childlinks->delete() ;
186	}
187	if ($childlinks = $mytree->look_down('_tag', 'div','class','navigation')) {
188	    $childlinks->postinsert(
189		HTML::Element->new_from_lol(
190		    [ 'div', {'align' => 'center'} ,
191		      [ 'img', { 'src' => $picturesdir . '/borg-logo.png', 'id' => 'borg_logo','alt' => 'Bacula Community Logo' }]
192		    ]
193		)) ;
194	}
195    }
196}
197#
198# Manage css to be located into ../css
199my @links = $mytree->look_down('_tag','link') ;
200foreach $childlinks (@links) {
201    my $link = $childlinks->attr('href') ;
202    if ($link =~ /^[a-zA-Z]+\.css/) {
203	$link = $cssdir . '/' . $link ;
204	$childlinks->attr('href',$link) ;
205    }
206}
207#
208# Manage navigation
209my @navs = $mytree->look_down('_tag','div','class','navigation') ;
210foreach my $nav (@navs) {
211    my @imgs = $nav->look_down('_tag','img') ;
212    foreach $childlinks (@imgs) {
213#	print "Traitement des images de navigation...\n" ;
214	my $img = $childlinks->attr('src') ;
215	if ($img =~ /^next.+/) {
216	    $childlinks->attr('class','navigation-next') ;
217	    $childlinks->attr('src', $picturesdir . '/' . $img) ;
218	}
219	if ($img =~ /^index.+/) {
220	    $childlinks->attr('class','navigation-next') ;
221	    $childlinks->attr('src', $picturesdir . '/' . $img) ;
222	}
223	if ($img =~ /^content.+/) {
224	    $childlinks->attr('class','navigation-next') ;
225	    $childlinks->attr('src', $picturesdir . '/' . $img) ;
226	}
227	if ($img =~ /^prev.+/) {
228	    $childlinks->attr('class','navigation-prev') ;
229	    $childlinks->attr('src', $picturesdir . '/' . $img) ;
230	}
231	if ($img =~ /^up.+/) {
232	    $childlinks->attr('class','navigation-up') ;
233	    $childlinks->attr('src', $picturesdir . '/' . $img) ;
234	}
235    }
236}
237#
238# Locate all <a name="whatever_but_SECTION...">
239my @atags = $mytree->look_down('_tag','a') ;
240local *AFH ;
241open AFH, ">> list-of-anchors" or die "Unable to append to list-of-anchors file\n";
242foreach $childlinks (@atags) {
243    my $atagname ;
244    if ($atagname = $childlinks->attr('name')) {
245	print AFH $manualname . "\t" . basename($inputfile) . "\t" . $atagname . "\n" ;
246    }
247}
248close AFH ;
249#
250# Send the tree to an HTML file
251if ($outputfile) {
252    local *FH ;
253    open FH, ">" . $outputfile or die "Unable to create $outputfile: $!\n" ;
254    print FH $mytree->as_HTML("<>","\t",{}) ;
255    close FH ;
256}
257else {
258    print $mytree->as_HTML("","\t",{}) ;
259}
260
2611;
262