1#!/usr/bin/perl -w 2# 3# Bacula Systems - Philippe Chauvat 4# 27 jul 2012 5# 6# This script is designed to translate Bacula enterprise LaTeX2HTML 7# documentation files to something more "tunable" / "adaptable" from a CSS 8# point of view. 9# 10# $1 is an HTML file to analyze and translate 11# The output is automatically send to $1.out 12# 13# - add some ids, class 14# - re-order some piece of HTML code 15# 16# This script is based on HTML::Parser module 17# 18# args: 19# -i: HTML input file 20# -o: HTML output file 21# -j: javascript directory 22# -c: css directory 23# -p: images (pictures) directory 24# -n: Manual name 25# -r: Source directory (ako part of -i arg) 26# -?: help / usage 27# -d: debug requested 28use HTML::Parser ; 29use HTML::TreeBuilder ; 30use HTML::PullParser ; 31use Getopt::Long ; 32use File::Basename ; 33use Data::Dumper ; 34sub usage { 35 print "translatedoc.pl -i | --input html-source-file 36 [ -o | --output html-destination-file ] 37 [ -j | --javascript javascript-diretory ] 38 [ -c | --css css-directory ] 39 [ -p | --pictures pictures-directory ] 40 [ -n | --name manual_name ] 41 [ -d | --debug ] 42 [ -r | --source-directory the_original_root_directory ] 43 [ --help | -? ]\n" ; 44 exit 1 ; 45} 46# 47# Send message to output in case of debug only 48# ====================== 49sub debugdump { 50 my ($what,$msg) = @_ ; 51 if ($debug) { 52 print "\n===============================\nBegin of $msg\n" ; 53 $what->dump ; 54 print "\n===============================\nEnd of $msg\n\n" ; 55 } 56} 57# 58# Args to Vars 59our($inputfile,$outputfile,$help,$debug,$mytree,$extractmenu,$picturesdir, 60 $cssdir,$javascriptdir,$manualname,$sourcedir) ; 61# 62# Usage in case of missing arguments 63usage() unless($#ARGV > -1) ; 64# 65# Input file / Output file 66GetOptions("input|i=s" => \$inputfile, 67 "output|o=s" => \$outputfile, 68 "pictures|p=s" => \$picturesdir, 69 "css|c=s" => \$cssdir, 70 "source-directory|r=s" => \$sourcedir, 71 "javascript|j=s" => \$javascriptdir, 72 "name|n=s" => \$manualname, 73 "debug|d" => \$debug, 74 "help|?" => \$help) or usage() ; 75usage() if ($help) ; 76usage() unless (defined $inputfile) ; 77 78die "$inputfile does not exists.\n" unless -e $inputfile ; 79 80if (! defined $outputfile ) { 81 $outputfile = "./" . basename($inputfile) . ".out" ; 82} 83 84if (! defined $picturesdir ) { 85 $picturesdir = "../images" ; 86} 87 88if (! defined $cssdir ) { 89 $cssdir = "../css" ; 90} 91 92if (! defined $javascriptdir ) { 93 $javascriptdir = "../js" ; 94} 95if (! defined $manualname) { 96 $manualname = "main" ; 97} 98# 99# Build HTML Tree of existing page 100$mytree = HTML::TreeBuilder->new ; 101$mytree->parse_file($inputfile) ; 102debugdump($mytree,"E1: ") ; 103# 104# Find the beginning of the content 105# Which is also a point where to put 106# the menu 107$beginning_of_content = $mytree->look_down('_tag','h1') ; 108$beginning_of_content = $mytree->look_down('_tag','h2') unless ($beginning_of_content) ; 109die "The only thing we could test is a <H1> / <H2> tags, which does not exist there...:$!\n" unless($beginning_of_content) ; 110# 111# Remove every 'dirty' lines 112# between <body> and <h1> tag 113# What is "before" the <h1> tag (between <body> and <h1>) is just dropped 114# my @lefts = $beginning_of_content->left() ; 115# foreach my $l (@lefts) { 116# $l->detach_content() ; 117# $l->delete_content() ; 118# $l->detach() ; 119# $l->delete() ; 120# } 121# # 122# # Remove Bacula community logo 123# if ($childlinks = $beginning_of_content->look_down('_tag','img','alt','\\includegraphics{bacula-logo.eps}')) { 124# $childlinks->detach() ; 125# $childlinks->delete() ; 126# } 127# # End remove Bacula logo 128# # 129# # Remove 'address' tag 130# if ($childlinks = $mytree->look_down('_tag','address')) { 131# $childlinks->detach() ; 132# $childlinks->delete() ; 133# } 134# # End remove address 135# 136# End remove dirty lines 137# 138# Replace textregistered images with the HTML special char 139my @images = $mytree->look_down('_tag','img') ; 140foreach $childlinks (@images) { 141 my $alttext = $childlinks->attr('alt') ; 142# print "Alt: $alttext\n" ; 143 if ($alttext =~ /.*registe.*/) { 144 $childlinks->preinsert(HTML::Element->new_from_lol(['span', {'class' => 'expochar' }, '®'])) ; 145 $childlinks->detach() ; 146 $childlinks->delete() ; 147 } 148 if ($alttext =~ /.*bacula.*-logo.*/) { 149 $childlinks->detach() ; 150 $childlinks->delete() ; 151 } 152} 153@images = $mytree->look_down('_tag','img') ; 154foreach $childlinks (@images) { 155 my $img = $childlinks->attr('src') ; 156 if ($img =~ /^\.\//) { 157 $img =~ s/\.\/// ; 158 $img = $picturesdir . '/' . $img ; 159 $childlinks->attr('src',$img) ; 160# print "img: " . $img . "\n" ; 161 } 162} 163if ($childlinks = $mytree->look_down('_tag','title')) { 164 # foreach my $i ($childlinks->content_refs_list) { 165 # next if ref $$i ; 166 # print "contenu: " . $$i . "\n" ; 167 # } 168 $childlinks->postinsert(HTML::Element->new_from_lol(['meta',{ 'http-equiv' => 'content-type', 'content' => 'text/html; charset=utf-8' } ])) ; 169} 170if ($childlinks = $mytree->look_down('_tag', 'div','class','author_info')) { 171 $childlinks->preinsert( 172 HTML::Element->new_from_lol( 173 [ 'h1', { 'align' => 'center' }, 174 [ 'div', { 'align' => 'center' }, 175 [ 'big', { 'class' => 'LARGE' }, "The Leading Open Source Backup Solution" ] 176 ] 177 ] 178 )) ; 179# 180# Adding the logo 181 if (my @regs = $mytree->look_down('_tag', 'span','class','MATH')) { 182 foreach $childlinks (@regs) { 183 $childlinks->preinsert(HTML::Element->new_from_lol(['span', {'class' => 'expochar' }, '®'])) ; 184 $childlinks->detach() ; 185 $childlinks->delete() ; 186 } 187 if ($childlinks = $mytree->look_down('_tag', 'div','class','navigation')) { 188 $childlinks->postinsert( 189 HTML::Element->new_from_lol( 190 [ 'div', {'align' => 'center'} , 191 [ 'img', { 'src' => $picturesdir . '/borg-logo.png', 'id' => 'borg_logo','alt' => 'Bacula Community Logo' }] 192 ] 193 )) ; 194 } 195 } 196} 197# 198# Manage css to be located into ../css 199my @links = $mytree->look_down('_tag','link') ; 200foreach $childlinks (@links) { 201 my $link = $childlinks->attr('href') ; 202 if ($link =~ /^[a-zA-Z]+\.css/) { 203 $link = $cssdir . '/' . $link ; 204 $childlinks->attr('href',$link) ; 205 } 206} 207# 208# Manage navigation 209my @navs = $mytree->look_down('_tag','div','class','navigation') ; 210foreach my $nav (@navs) { 211 my @imgs = $nav->look_down('_tag','img') ; 212 foreach $childlinks (@imgs) { 213# print "Traitement des images de navigation...\n" ; 214 my $img = $childlinks->attr('src') ; 215 if ($img =~ /^next.+/) { 216 $childlinks->attr('class','navigation-next') ; 217 $childlinks->attr('src', $picturesdir . '/' . $img) ; 218 } 219 if ($img =~ /^index.+/) { 220 $childlinks->attr('class','navigation-next') ; 221 $childlinks->attr('src', $picturesdir . '/' . $img) ; 222 } 223 if ($img =~ /^content.+/) { 224 $childlinks->attr('class','navigation-next') ; 225 $childlinks->attr('src', $picturesdir . '/' . $img) ; 226 } 227 if ($img =~ /^prev.+/) { 228 $childlinks->attr('class','navigation-prev') ; 229 $childlinks->attr('src', $picturesdir . '/' . $img) ; 230 } 231 if ($img =~ /^up.+/) { 232 $childlinks->attr('class','navigation-up') ; 233 $childlinks->attr('src', $picturesdir . '/' . $img) ; 234 } 235 } 236} 237# 238# Locate all <a name="whatever_but_SECTION..."> 239my @atags = $mytree->look_down('_tag','a') ; 240local *AFH ; 241open AFH, ">> list-of-anchors" or die "Unable to append to list-of-anchors file\n"; 242foreach $childlinks (@atags) { 243 my $atagname ; 244 if ($atagname = $childlinks->attr('name')) { 245 print AFH $manualname . "\t" . basename($inputfile) . "\t" . $atagname . "\n" ; 246 } 247} 248close AFH ; 249# 250# Send the tree to an HTML file 251if ($outputfile) { 252 local *FH ; 253 open FH, ">" . $outputfile or die "Unable to create $outputfile: $!\n" ; 254 print FH $mytree->as_HTML("<>","\t",{}) ; 255 close FH ; 256} 257else { 258 print $mytree->as_HTML("","\t",{}) ; 259} 260 2611; 262