1 //========================================================================
2 //
3 // pdftohtml.cc
4 //
5 // Copyright 2005 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 #include <aconf.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include "gmem.h"
13 #include "gmempp.h"
14 #include "parseargs.h"
15 #include "gfile.h"
16 #include "GString.h"
17 #include "GlobalParams.h"
18 #include "PDFDoc.h"
19 #include "HTMLGen.h"
20 #include "Error.h"
21 #include "ErrorCodes.h"
22 #include "config.h"
23 
24 //------------------------------------------------------------------------
25 
26 static GBool createIndex(char *htmlDir);
27 
28 //------------------------------------------------------------------------
29 
30 static int firstPage = 1;
31 static int lastPage = 0;
32 static double zoom = 1;
33 static int resolution = 150;
34 static GBool noFonts = gFalse;
35 static GBool skipInvisible = gFalse;
36 static GBool allInvisible = gFalse;
37 static char ownerPassword[33] = "\001";
38 static char userPassword[33] = "\001";
39 static GBool quiet = gFalse;
40 static char cfgFileName[256] = "";
41 static GBool printVersion = gFalse;
42 static GBool printHelp = gFalse;
43 
44 static ArgDesc argDesc[] = {
45   {"-f",       argInt,      &firstPage,     0,
46    "first page to convert"},
47   {"-l",       argInt,      &lastPage,      0,
48    "last page to convert"},
49   {"-z",       argFP,       &zoom,          0,
50    "initial zoom level (1.0 means 72dpi)"},
51   {"-r",       argInt,      &resolution,    0,
52    "resolution, in DPI (default is 150)"},
53   {"-nofonts", argFlag, &noFonts,           0,
54    "do not extract embedded fonts"},
55   {"-skipinvisible", argFlag, &skipInvisible, 0,
56    "do not draw invisible text"},
57   {"-allinvisible",  argFlag, &allInvisible,  0,
58    "treat all text as invisible"},
59   {"-opw",     argString,   ownerPassword,  sizeof(ownerPassword),
60    "owner password (for encrypted files)"},
61   {"-upw",     argString,   userPassword,   sizeof(userPassword),
62    "user password (for encrypted files)"},
63   {"-q",       argFlag,     &quiet,         0,
64    "don't print any messages or errors"},
65   {"-cfg",     argString,   cfgFileName,    sizeof(cfgFileName),
66    "configuration file to use in place of .xpdfrc"},
67   {"-v",       argFlag,     &printVersion,  0,
68    "print copyright and version info"},
69   {"-h",       argFlag,     &printHelp,     0,
70    "print usage information"},
71   {"-help",    argFlag,     &printHelp,     0,
72    "print usage information"},
73   {"--help",   argFlag,     &printHelp,     0,
74    "print usage information"},
75   {"-?",       argFlag,     &printHelp,     0,
76    "print usage information"},
77   {NULL}
78 };
79 
80 //------------------------------------------------------------------------
81 
writeToFile(void * file,const char * data,int size)82 static int writeToFile(void *file, const char *data, int size) {
83   return (int)fwrite(data, 1, size, (FILE *)file);
84 }
85 
main(int argc,char * argv[])86 int main(int argc, char *argv[]) {
87   PDFDoc *doc;
88   char *fileName;
89   char *htmlDir;
90   GString *ownerPW, *userPW;
91   HTMLGen *htmlGen;
92   GString *htmlFileName, *pngFileName, *pngURL;
93   FILE *htmlFile, *pngFile;
94   int pg, err, exitCode;
95   GBool ok;
96 
97   exitCode = 99;
98 
99   // parse args
100   fixCommandLine(&argc, &argv);
101   ok = parseArgs(argDesc, &argc, argv);
102   if (!ok || argc != 3 || printVersion || printHelp) {
103     fprintf(stderr, "pdftohtml version %s [www.xpdfreader.com]\n", xpdfVersion);
104     fprintf(stderr, "%s\n", xpdfCopyright);
105     if (!printVersion) {
106       printUsage("pdftohtml", "<PDF-file> <html-dir>", argDesc);
107     }
108     goto err0;
109   }
110   fileName = argv[1];
111   htmlDir = argv[2];
112 
113   // read config file
114   globalParams = new GlobalParams(cfgFileName);
115   if (quiet) {
116     globalParams->setErrQuiet(quiet);
117   }
118   globalParams->setupBaseFonts(NULL);
119   globalParams->setTextEncoding("UTF-8");
120 
121   // open PDF file
122   if (ownerPassword[0] != '\001') {
123     ownerPW = new GString(ownerPassword);
124   } else {
125     ownerPW = NULL;
126   }
127   if (userPassword[0] != '\001') {
128     userPW = new GString(userPassword);
129   } else {
130     userPW = NULL;
131   }
132   doc = new PDFDoc(fileName, ownerPW, userPW);
133   if (userPW) {
134     delete userPW;
135   }
136   if (ownerPW) {
137     delete ownerPW;
138   }
139   if (!doc->isOk()) {
140     exitCode = 1;
141     goto err1;
142   }
143 
144   // check for copy permission
145   if (!doc->okToCopy()) {
146     error(errNotAllowed, -1,
147 	  "Copying of text from this document is not allowed.");
148     exitCode = 3;
149     goto err1;
150   }
151 
152   // get page range
153   if (firstPage < 1) {
154     firstPage = 1;
155   }
156   if (lastPage < 1 || lastPage > doc->getNumPages()) {
157     lastPage = doc->getNumPages();
158   }
159 
160   // create HTML directory
161   if (makeDir(htmlDir, 0755)) {
162     error(errIO, -1, "Couldn't create HTML output directory '{0:s}'",
163 	  htmlDir);
164     exitCode = 2;
165     goto err1;
166   }
167 
168   // set up the HTMLGen object
169   htmlGen = new HTMLGen(resolution);
170   if (!htmlGen->isOk()) {
171     exitCode = 99;
172     goto err1;
173   }
174   htmlGen->setZoom(zoom);
175   htmlGen->setDrawInvisibleText(!skipInvisible);
176   htmlGen->setAllTextInvisible(allInvisible);
177   htmlGen->setExtractFontFiles(!noFonts);
178   htmlGen->startDoc(doc);
179 
180   // convert the pages
181   for (pg = firstPage; pg <= lastPage; ++pg) {
182     htmlFileName = GString::format("{0:s}/page{1:d}.html", htmlDir, pg);
183     pngFileName = GString::format("{0:s}/page{1:d}.png", htmlDir, pg);
184     if (!(htmlFile = openFile(htmlFileName->getCString(), "wb"))) {
185       error(errIO, -1, "Couldn't open HTML file '{0:t}'", htmlFileName);
186       delete htmlFileName;
187       delete pngFileName;
188       goto err2;
189     }
190     if (!(pngFile = openFile(pngFileName->getCString(), "wb"))) {
191       error(errIO, -1, "Couldn't open PNG file '{0:t}'", pngFileName);
192       fclose(htmlFile);
193       delete htmlFileName;
194       delete pngFileName;
195       goto err2;
196     }
197     pngURL = GString::format("page{0:d}.png", pg);
198     err = htmlGen->convertPage(pg, pngURL->getCString(), htmlDir,
199 			       &writeToFile, htmlFile,
200 			       &writeToFile, pngFile);
201     delete pngURL;
202     fclose(htmlFile);
203     fclose(pngFile);
204     delete htmlFileName;
205     delete pngFileName;
206     if (err != errNone) {
207       error(errIO, -1, "Error converting page {0:d}", pg);
208       exitCode = 2;
209       goto err2;
210     }
211   }
212 
213   // create the master index
214   if (!createIndex(htmlDir)) {
215     exitCode = 2;
216     goto err2;
217   }
218 
219   exitCode = 0;
220 
221   // clean up
222  err2:
223   delete htmlGen;
224  err1:
225   delete doc;
226   delete globalParams;
227  err0:
228 
229   // check for memory leaks
230   Object::memCheck(stderr);
231   gMemReport(stderr);
232 
233   return exitCode;
234 }
235 
createIndex(char * htmlDir)236 static GBool createIndex(char *htmlDir) {
237   GString *htmlFileName;
238   FILE *html;
239   int pg;
240 
241   htmlFileName = GString::format("{0:s}/index.html", htmlDir);
242   html = openFile(htmlFileName->getCString(), "w");
243   delete htmlFileName;
244   if (!html) {
245     error(errIO, -1, "Couldn't open HTML file '{0:t}'", htmlFileName);
246     return gFalse;
247   }
248 
249   fprintf(html, "<html>\n");
250   fprintf(html, "<body>\n");
251   for (pg = firstPage; pg <= lastPage; ++pg) {
252     fprintf(html, "<a href=\"page%d.html\">page %d</a><br>\n", pg, pg);
253   }
254   fprintf(html, "</body>\n");
255   fprintf(html, "</html>\n");
256 
257   fclose(html);
258 
259   return gTrue;
260 }
261