1 //C- -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001 AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software. Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C- ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE." Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License. This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55
56 #ifdef HAVE_CONFIG_H
57 # include "config.h"
58 #endif
59 #if NEED_GNUG_PRAGMAS
60 # pragma implementation
61 #endif
62
63 #include "DjVuDocument.h"
64 #include "DjVmDir.h"
65 #include "ByteStream.h"
66 #include "IFFByteStream.h"
67 #include "DjVuText.h"
68 #include "DjVuImage.h"
69 #include "GString.h"
70 #include "GOS.h"
71 #include "GURL.h"
72 #include "DjVuMessage.h"
73
74 #include "common.h"
75
76 #include <sys/stat.h>
77 #include <time.h>
78
79
80 static bool cgi = false;
81 static bool head = false;
82
83
84 struct DJVUSERVEGlobal
85 {
86 // Globals that need static initialization
87 // are grouped here to work around broken compilers.
88 GUTF8String pathinfo;
89 GUTF8String pathtranslated;
90 GUTF8String requestmethod;
91 GUTF8String querystring;
92 };
93
g(void)94 static DJVUSERVEGlobal& g(void)
95 {
96 static DJVUSERVEGlobal g;
97 return g;
98 }
99
100
101 static void
usage(void)102 usage(void)
103 {
104 DjVuPrintErrorUTF8(
105 #ifdef DJVULIBRE_VERSION
106 "DJVUSERVE --- DjVuLibre-" DJVULIBRE_VERSION "\n"
107 #endif
108 "Extracts hidden text from Djvu files\n"
109 "\n"
110 "Usage: djvuserve [<djvufile>[/<djvmid>]\n"
111 "Outputs the specified <djvufile> with valid Content-Type,\n"
112 "Content-Length, and Expire HTTP headers. Bundled multipage DjVu\n"
113 "documents are accessed as indirect document using the <djvmid>\n"
114 "syntax. Specifying a <djvmid> of <index> generates an indirect page\n"
115 "directory pointing to the other component files.\n"
116 "This program is designed to be used as a CGI executable.\n"
117 "It uses environment variable PATH_TRANSLATED when executed\n"
118 "without arguments.\n\n" );
119 exit(10);
120 }
121
122 static const char *
day_name(int d)123 day_name(int d)
124 {
125 static const char *n[] = {
126 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
127 };
128 if (d>=0 && d<7)
129 return n[d];
130 return "???";
131 }
132
133 static const char*
month_name(int d)134 month_name(int d)
135 {
136 static const char *n[] = {
137 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
138 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
139 };
140 if (d>=0 && d<12)
141 return n[d];
142 return "???";
143 }
144
145 void
fprintdate(FILE * f,const char * fmt,const time_t * tim)146 fprintdate(FILE *f, const char *fmt, const time_t *tim)
147 {
148 char ctim[128];
149 struct tm *ttim = gmtime(tim);
150 /* strftime(ctim, sizeof(ctim)-1, "%a, %d %b %Y %H:%M:%S GMT", ttim); */
151 sprintf(ctim,"%3s, %02d %3s %04d %02d:%02d:%02d GMT",
152 day_name(ttim->tm_wday), ttim->tm_mday,
153 month_name(ttim->tm_mon), 1900+ttim->tm_year,
154 ttim->tm_hour, ttim->tm_min, ttim->tm_sec);
155 fprintf(stdout, fmt, ctim);
156 }
157
158 void
headers(const struct stat * statbuf,const char * fname=0)159 headers(const struct stat *statbuf, const char *fname = 0)
160 {
161 fprintf(stdout,"Content-Type: image/x.djvu\n");
162 if (fname)
163 fprintf(stdout,"Content-Disposition: attachment; filename=\"%s\"\n", fname);
164 fprintf(stdout,"Content-Length: %ld\n", (long)statbuf->st_size);
165 time_t tim = time(0) + 360 * 24 * 3600;
166 fprintdate(stdout, "Last-Modified: %s\n", &statbuf->st_mtime);
167 fprintdate(stdout, "Expires: %s\n", &tim);
168 }
169
170 bool
is_djvu_file_bundled(GURL & pathurl)171 is_djvu_file_bundled(GURL &pathurl)
172 {
173 GP<ByteStream> in = ByteStream::create(pathurl,"rb");
174 GP<IFFByteStream> iff = IFFByteStream::create(in);
175 GUTF8String chkid;
176 iff->get_chunk(chkid);
177 // Make sure that this is a DjVu file.
178 if (chkid != "FORM:DJVU" &&
179 chkid != "FORM:DJVM" &&
180 chkid != "FORM:PM44" &&
181 chkid != "FORM:BM44" )
182 G_THROW("Corrupted DjVu file");
183 // Test if it is bundled
184 if (chkid == "FORM:DJVM")
185 {
186 while (iff->get_chunk(chkid) && chkid!="DIRM")
187 iff->close_chunk();
188 if (chkid == "DIRM")
189 {
190 GP<ByteStream> dirm = iff->get_bytestream();
191 if (dirm->read8() & 0x80)
192 return true;
193 }
194 }
195 return false;
196 }
197
198 void
djvuserver_file(GURL pathurl,bool bundled,bool download)199 djvuserver_file(GURL pathurl, bool bundled, bool download)
200 {
201 GNativeString fname = pathurl.NativeFilename();
202 struct stat statbuf;
203 if (stat((const char *)fname, &statbuf) < 0)
204 G_THROW(strerror(errno));
205
206 // Is this a bundled file?
207 if (is_djvu_file_bundled(pathurl) && !bundled)
208 {
209 // It is bundled
210 GUTF8String id = pathurl.name();
211 fprintf(stdout,"Location: %s/index.djvu", (const char*)id);
212 if (g().querystring.length())
213 fprintf(stdout,"?%s", (const char*)g().querystring);
214 fprintf(stdout,"\n\n");
215 return;
216 }
217 // Push the file
218 if (download)
219 headers(&statbuf, pathurl.fname());
220 else
221 headers(&statbuf);
222 if (head)
223 return;
224 fprintf(stdout,"\n");
225 fflush(stdout);
226 GP<ByteStream> in = ByteStream::create(pathurl,"rb");
227 GP<ByteStream> out = ByteStream::get_stdout("ab");
228 out->copy(*in);
229 }
230
231 void
djvuserver_directory(GURL pathurl)232 djvuserver_directory(GURL pathurl)
233 {
234 GNativeString fname = pathurl.NativeFilename();
235 struct stat statbuf;
236 if (stat((const char *)fname, &statbuf) < 0)
237 G_THROW(strerror(errno));
238 // Find the DIRM chunk directly (save time)
239 GP<ByteStream> temp;
240 GP<ByteStream> bsin = ByteStream::create(pathurl,"rb");
241 GP<DjVmDir> dir = DjVmDir::create();
242 {
243 GP<IFFByteStream> iffin = IFFByteStream::create(bsin);
244 GUTF8String chkid;
245 iffin->get_chunk(chkid);
246 if (chkid != "FORM:DJVM")
247 G_THROW( "This is not a multipage DjVu document" );
248 while (iffin->get_chunk(chkid) && chkid!="DIRM")
249 iffin->close_chunk();
250 if (chkid != "DIRM")
251 G_THROW( "This is not a new style bundled DjVu document" );
252 temp = iffin->get_bytestream();
253 dir->decode(temp);
254 if (! dir->is_bundled())
255 G_THROW( "This is not a bundled DjVu document" );
256 }
257 // Assemble index of indirect multipage file
258 GP<ByteStream> bsdir = ByteStream::create();
259 {
260 GP<IFFByteStream> iff = IFFByteStream::create(bsdir);
261 iff->put_chunk("FORM:DJVM",1);
262 iff->put_chunk("DIRM");
263 temp = iff->get_bytestream();
264 dir->encode(temp, false, false);
265 iff->close_chunk();
266 iff->close_chunk();
267 }
268 // HTTP output
269 statbuf.st_size = bsdir->tell();
270 headers(&statbuf);
271 if (head)
272 return;
273 bsdir->seek(0);
274 fprintf(stdout,"\n");
275 fflush(stdout);
276 GP<ByteStream> out = ByteStream::get_stdout("ab");
277 out->copy(*bsdir);
278 }
279
280 void
djvuserver_component(GURL pathurl,GUTF8String id)281 djvuserver_component(GURL pathurl, GUTF8String id)
282 {
283 GNativeString fname = pathurl.NativeFilename();
284 struct stat statbuf;
285 if (stat((const char *)fname, &statbuf) < 0)
286 G_THROW(strerror(errno));
287 // Find the DIRM chunk directly (save time)
288 GP<ByteStream> temp;
289 GP<ByteStream> bsin = ByteStream::create(pathurl,"rb");
290 GP<DjVmDir> dir = DjVmDir::create();
291 {
292 GP<IFFByteStream> iffin = IFFByteStream::create(bsin);
293 GUTF8String chkid;
294 iffin->get_chunk(chkid);
295 if (chkid != "FORM:DJVM")
296 G_THROW( "This is not a multipage DjVu document" );
297 while (iffin->get_chunk(chkid) && chkid!="DIRM")
298 iffin->close_chunk();
299 if (chkid != "DIRM")
300 G_THROW( "This is not a new style bundled DjVu document" );
301 temp = iffin->get_bytestream();
302 dir->decode(temp);
303 if (! dir->is_bundled())
304 G_THROW( "This is not a bundled DjVu document" );
305 }
306 // Find the file record
307 GP<DjVmDir::File> frec = dir->id_to_file(id);
308 if (!frec)
309 G_THROW( "Cannot locate requested component file" );
310 if (!frec->size || !frec->offset)
311 G_THROW( "Corrupted DjVu directory" );
312
313 // HTTP output
314 statbuf.st_size = frec->size + 4;
315 headers(&statbuf);
316 if (head)
317 return;
318 fprintf(stdout,"\n");
319 fflush(stdout);
320 GP<ByteStream> out = ByteStream::get_stdout("ab");
321 out->writall("AT&T", 4);
322 bsin->seek(frec->offset);
323 out->copy(*bsin, frec->size);
324 }
325
326
327 bool
search_cgi_arg(const char * name)328 search_cgi_arg(const char *name)
329 {
330 const char *s = g().querystring;
331 int l = strlen(name);
332 if (*s == '?')
333 s += 1;
334 while (*s)
335 {
336 if (! strncmp(s, name, l))
337 if (s[l]=='&' || s[l]=='=' || s[l]==0)
338 return true;
339 while (*s && *s != '&')
340 s += 1;
341 if (*s == '&')
342 s += 1;
343 }
344 return false;
345 }
346
347
348 int
main(int argc,char ** argv)349 main(int argc, char ** argv)
350 {
351 DJVU_LOCALE;
352 G_TRY
353 {
354 // Obtain path
355 bool bundled = false;
356 bool download = false;
357 if (argc == 1)
358 {
359 cgi = true;
360 g().pathinfo = GNativeString(getenv("PATH_INFO"));
361 g().pathtranslated = GNativeString(getenv("PATH_TRANSLATED"));
362 if (! g().pathinfo)
363 usage();
364 if (! g().pathtranslated)
365 G_THROW("No path information");
366 g().requestmethod = GNativeString(getenv("REQUEST_METHOD"));
367 g().querystring = GUTF8String(getenv("QUERY_STRING"));
368 if (search_cgi_arg("bundled"))
369 bundled = true;
370 if (search_cgi_arg("download") || search_cgi_arg("bundle"))
371 bundled = download = true;
372 }
373 else if (argc == 2)
374 {
375 cgi = false;
376 g().pathtranslated = GNativeString(argv[1]);
377 g().requestmethod = "GET";
378 }
379 if (! g().pathtranslated)
380 usage();
381 head = false;
382 if (g().requestmethod == "HEAD")
383 head = true;
384 else if (g().requestmethod != "GET")
385 G_THROW("Only serve HEAD and GET requests");
386 // Do it.
387 GURL pathurl = GURL::Filename::UTF8(g().pathtranslated);
388 if (pathurl.is_file())
389 {
390 djvuserver_file(pathurl, bundled, download);
391 }
392 else
393 {
394 GUTF8String id = pathurl.name();
395 pathurl = pathurl.base();
396 if (! pathurl.is_file())
397 G_THROW("File not found");
398 if (id != "index" && id != "index.djvu")
399 djvuserver_component(pathurl, id);
400 else if (bundled)
401 djvuserver_file(pathurl, bundled, download);
402 else
403 djvuserver_directory(pathurl);
404 }
405 }
406 G_CATCH(ex)
407 {
408 if (cgi)
409 {
410 GUTF8String cause = DjVuMessageLite::LookUpUTF8(ex.get_cause());
411 fprintf(stdout,"Status: 400 %s\n", (const char*)cause);
412 fprintf(stdout,"Content-Type: text/html\n\n");
413 fprintf(stdout,
414 "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"
415 "<HTML><HEAD><TITLE>400 Error</TITLE></HEAD><BODY>\n"
416 "<H1>%s</H1>The requested URL '%s' cannot be processed.<P>\n"
417 #ifdef DJVULIBRE_VERSION
418 "<HR><ADDRESS>djvuserve/DjVuLibre-" DJVULIBRE_VERSION "</ADDRESS>\n"
419 #endif
420 "</BODY></HTML>\n",
421 (const char *) cause,
422 (const char *) g().pathinfo );
423 }
424 else
425 {
426 ex.perror();
427 }
428 exit(10);
429 }
430 G_ENDCATCH;
431 // return code zero
432 return 0;
433 }
434