1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 #ifdef HAVE_CONFIG_H
57 # include "config.h"
58 #endif
59 #if NEED_GNUG_PRAGMAS
60 # pragma implementation
61 #endif
62 
63 #include "DjVuDocument.h"
64 #include "DjVmDir.h"
65 #include "ByteStream.h"
66 #include "IFFByteStream.h"
67 #include "DjVuText.h"
68 #include "DjVuImage.h"
69 #include "GString.h"
70 #include "GOS.h"
71 #include "GURL.h"
72 #include "DjVuMessage.h"
73 
74 #include "common.h"
75 
76 #include <sys/stat.h>
77 #include <time.h>
78 
79 
80 static bool cgi = false;
81 static bool head = false;
82 
83 
84 struct DJVUSERVEGlobal
85 {
86   // Globals that need static initialization
87   // are grouped here to work around broken compilers.
88   GUTF8String pathinfo;
89   GUTF8String pathtranslated;
90   GUTF8String requestmethod;
91   GUTF8String querystring;
92 };
93 
g(void)94 static DJVUSERVEGlobal& g(void)
95 {
96   static DJVUSERVEGlobal g;
97   return g;
98 }
99 
100 
101 static void
usage(void)102 usage(void)
103 {
104    DjVuPrintErrorUTF8(
105 #ifdef DJVULIBRE_VERSION
106           "DJVUSERVE --- DjVuLibre-" DJVULIBRE_VERSION "\n"
107 #endif
108           "Extracts hidden text from Djvu files\n"
109           "\n"
110           "Usage: djvuserve [<djvufile>[/<djvmid>]\n"
111           "Outputs the specified <djvufile> with valid Content-Type,\n"
112           "Content-Length, and Expire HTTP headers.  Bundled multipage DjVu\n"
113           "documents are accessed as indirect document using the <djvmid>\n"
114           "syntax. Specifying a <djvmid> of <index> generates an indirect page\n"
115           "directory pointing to the other component files.\n"
116           "This program is designed to be used as a CGI executable.\n"
117           "It uses environment variable PATH_TRANSLATED when executed\n"
118           "without arguments.\n\n" );
119    exit(10);
120 }
121 
122 static const char *
day_name(int d)123 day_name(int d)
124 {
125    static const char *n[] = {
126      "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
127    };
128    if (d>=0 && d<7)
129      return n[d];
130    return "???";
131 }
132 
133 static const char*
month_name(int d)134 month_name(int d)
135 {
136    static const char *n[] = {
137      "Jan", "Feb", "Mar", "Apr", "May", "Jun",
138      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
139    };
140    if (d>=0 && d<12)
141      return n[d];
142    return "???";
143 }
144 
145 void
fprintdate(FILE * f,const char * fmt,const time_t * tim)146 fprintdate(FILE *f, const char *fmt, const time_t *tim)
147 {
148   char ctim[128];
149   struct tm *ttim = gmtime(tim);
150   /* strftime(ctim, sizeof(ctim)-1, "%a, %d %b %Y %H:%M:%S GMT", ttim); */
151   sprintf(ctim,"%3s, %02d %3s %04d %02d:%02d:%02d GMT",
152 	  day_name(ttim->tm_wday), ttim->tm_mday,
153 	  month_name(ttim->tm_mon), 1900+ttim->tm_year,
154 	  ttim->tm_hour, ttim->tm_min, ttim->tm_sec);
155   fprintf(stdout, fmt, ctim);
156 }
157 
158 void
headers(const struct stat * statbuf,const char * fname=0)159 headers(const struct stat *statbuf, const char *fname = 0)
160 {
161   fprintf(stdout,"Content-Type: image/x.djvu\n");
162   if (fname)
163     fprintf(stdout,"Content-Disposition: attachment; filename=\"%s\"\n", fname);
164   fprintf(stdout,"Content-Length: %ld\n", (long)statbuf->st_size);
165   time_t tim = time(0) + 360 * 24 * 3600;
166   fprintdate(stdout, "Last-Modified: %s\n", &statbuf->st_mtime);
167   fprintdate(stdout, "Expires: %s\n", &tim);
168 }
169 
170 bool
is_djvu_file_bundled(GURL & pathurl)171 is_djvu_file_bundled(GURL &pathurl)
172 {
173   GP<ByteStream> in = ByteStream::create(pathurl,"rb");
174   GP<IFFByteStream> iff = IFFByteStream::create(in);
175   GUTF8String chkid;
176   iff->get_chunk(chkid);
177   // Make sure that this is a DjVu file.
178   if (chkid != "FORM:DJVU" &&
179       chkid != "FORM:DJVM" &&
180       chkid != "FORM:PM44" &&
181       chkid != "FORM:BM44"   )
182     G_THROW("Corrupted DjVu file");
183   // Test if it is bundled
184   if (chkid == "FORM:DJVM")
185     {
186       while (iff->get_chunk(chkid) && chkid!="DIRM")
187         iff->close_chunk();
188       if (chkid == "DIRM")
189         {
190           GP<ByteStream> dirm = iff->get_bytestream();
191           if (dirm->read8() & 0x80)
192             return true;
193         }
194     }
195   return false;
196 }
197 
198 void
djvuserver_file(GURL pathurl,bool bundled,bool download)199 djvuserver_file(GURL pathurl, bool bundled, bool download)
200 {
201   GNativeString fname = pathurl.NativeFilename();
202   struct stat statbuf;
203   if (stat((const char *)fname, &statbuf) < 0)
204     G_THROW(strerror(errno));
205 
206   // Is this a bundled file?
207   if (is_djvu_file_bundled(pathurl) && !bundled)
208     {
209       // It is bundled
210       GUTF8String id = pathurl.name();
211       fprintf(stdout,"Location: %s/index.djvu", (const char*)id);
212       if (g().querystring.length())
213         fprintf(stdout,"?%s", (const char*)g().querystring);
214       fprintf(stdout,"\n\n");
215       return;
216     }
217   // Push the file
218   if (download)
219     headers(&statbuf, pathurl.fname());
220   else
221     headers(&statbuf);
222   if (head)
223     return;
224   fprintf(stdout,"\n");
225   fflush(stdout);
226   GP<ByteStream> in = ByteStream::create(pathurl,"rb");
227   GP<ByteStream> out = ByteStream::get_stdout("ab");
228   out->copy(*in);
229 }
230 
231 void
djvuserver_directory(GURL pathurl)232 djvuserver_directory(GURL pathurl)
233 {
234   GNativeString fname = pathurl.NativeFilename();
235   struct stat statbuf;
236   if (stat((const char *)fname, &statbuf) < 0)
237     G_THROW(strerror(errno));
238   // Find the DIRM chunk directly (save time)
239   GP<ByteStream> temp;
240   GP<ByteStream> bsin = ByteStream::create(pathurl,"rb");
241   GP<DjVmDir> dir = DjVmDir::create();
242   {
243     GP<IFFByteStream> iffin = IFFByteStream::create(bsin);
244     GUTF8String chkid;
245     iffin->get_chunk(chkid);
246     if (chkid != "FORM:DJVM")
247       G_THROW( "This is not a multipage DjVu document" );
248     while (iffin->get_chunk(chkid) && chkid!="DIRM")
249       iffin->close_chunk();
250     if (chkid != "DIRM")
251       G_THROW( "This is not a new style bundled DjVu document" );
252     temp = iffin->get_bytestream();
253     dir->decode(temp);
254     if (! dir->is_bundled())
255       G_THROW( "This is not a bundled DjVu document" );
256   }
257   // Assemble index of indirect multipage file
258   GP<ByteStream> bsdir = ByteStream::create();
259   {
260     GP<IFFByteStream> iff = IFFByteStream::create(bsdir);
261     iff->put_chunk("FORM:DJVM",1);
262     iff->put_chunk("DIRM");
263     temp = iff->get_bytestream();
264     dir->encode(temp, false, false);
265     iff->close_chunk();
266     iff->close_chunk();
267   }
268   // HTTP output
269   statbuf.st_size = bsdir->tell();
270   headers(&statbuf);
271   if (head)
272     return;
273   bsdir->seek(0);
274   fprintf(stdout,"\n");
275   fflush(stdout);
276   GP<ByteStream> out = ByteStream::get_stdout("ab");
277   out->copy(*bsdir);
278 }
279 
280 void
djvuserver_component(GURL pathurl,GUTF8String id)281 djvuserver_component(GURL pathurl, GUTF8String id)
282 {
283   GNativeString fname = pathurl.NativeFilename();
284   struct stat statbuf;
285   if (stat((const char *)fname, &statbuf) < 0)
286     G_THROW(strerror(errno));
287   // Find the DIRM chunk directly (save time)
288   GP<ByteStream> temp;
289   GP<ByteStream> bsin = ByteStream::create(pathurl,"rb");
290   GP<DjVmDir> dir = DjVmDir::create();
291   {
292     GP<IFFByteStream> iffin = IFFByteStream::create(bsin);
293     GUTF8String chkid;
294     iffin->get_chunk(chkid);
295     if (chkid != "FORM:DJVM")
296       G_THROW( "This is not a multipage DjVu document" );
297     while (iffin->get_chunk(chkid) && chkid!="DIRM")
298       iffin->close_chunk();
299     if (chkid != "DIRM")
300       G_THROW( "This is not a new style bundled DjVu document" );
301     temp = iffin->get_bytestream();
302     dir->decode(temp);
303     if (! dir->is_bundled())
304       G_THROW( "This is not a bundled DjVu document" );
305   }
306   // Find the file record
307   GP<DjVmDir::File> frec = dir->id_to_file(id);
308   if (!frec)
309     G_THROW( "Cannot locate requested component file" );
310   if (!frec->size || !frec->offset)
311     G_THROW( "Corrupted DjVu directory" );
312 
313   // HTTP output
314   statbuf.st_size = frec->size + 4;
315   headers(&statbuf);
316   if (head)
317     return;
318   fprintf(stdout,"\n");
319   fflush(stdout);
320   GP<ByteStream> out = ByteStream::get_stdout("ab");
321   out->writall("AT&T", 4);
322   bsin->seek(frec->offset);
323   out->copy(*bsin, frec->size);
324 }
325 
326 
327 bool
search_cgi_arg(const char * name)328 search_cgi_arg(const char *name)
329 {
330   const char *s = g().querystring;
331   int l = strlen(name);
332   if (*s == '?')
333     s += 1;
334   while (*s)
335     {
336       if (! strncmp(s, name, l))
337         if (s[l]=='&' || s[l]=='=' || s[l]==0)
338           return true;
339       while (*s && *s != '&')
340         s += 1;
341       if (*s == '&')
342         s += 1;
343     }
344   return false;
345 }
346 
347 
348 int
main(int argc,char ** argv)349 main(int argc, char ** argv)
350 {
351   DJVU_LOCALE;
352   G_TRY
353     {
354       // Obtain path
355       bool bundled = false;
356       bool download = false;
357       if (argc == 1)
358         {
359           cgi = true;
360           g().pathinfo = GNativeString(getenv("PATH_INFO"));
361           g().pathtranslated = GNativeString(getenv("PATH_TRANSLATED"));
362           if (! g().pathinfo)
363             usage();
364           if (! g().pathtranslated)
365             G_THROW("No path information");
366           g().requestmethod = GNativeString(getenv("REQUEST_METHOD"));
367           g().querystring = GUTF8String(getenv("QUERY_STRING"));
368           if (search_cgi_arg("bundled"))
369             bundled = true;
370           if (search_cgi_arg("download") || search_cgi_arg("bundle"))
371             bundled = download = true;
372         }
373       else if (argc == 2)
374         {
375           cgi = false;
376           g().pathtranslated = GNativeString(argv[1]);
377           g().requestmethod = "GET";
378         }
379       if (! g().pathtranslated)
380         usage();
381       head = false;
382       if (g().requestmethod == "HEAD")
383         head = true;
384       else if (g().requestmethod != "GET")
385         G_THROW("Only serve HEAD and GET requests");
386       // Do it.
387       GURL pathurl = GURL::Filename::UTF8(g().pathtranslated);
388       if (pathurl.is_file())
389         {
390           djvuserver_file(pathurl, bundled, download);
391         }
392       else
393         {
394           GUTF8String id = pathurl.name();
395           pathurl = pathurl.base();
396           if (! pathurl.is_file())
397             G_THROW("File not found");
398           if (id != "index" && id != "index.djvu")
399             djvuserver_component(pathurl, id);
400           else if (bundled)
401             djvuserver_file(pathurl, bundled, download);
402           else
403             djvuserver_directory(pathurl);
404         }
405     }
406   G_CATCH(ex)
407     {
408       if (cgi)
409         {
410           GUTF8String cause = DjVuMessageLite::LookUpUTF8(ex.get_cause());
411           fprintf(stdout,"Status: 400 %s\n", (const char*)cause);
412           fprintf(stdout,"Content-Type: text/html\n\n");
413           fprintf(stdout,
414                   "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"
415                   "<HTML><HEAD><TITLE>400 Error</TITLE></HEAD><BODY>\n"
416                   "<H1>%s</H1>The requested URL '%s' cannot be processed.<P>\n"
417 #ifdef DJVULIBRE_VERSION
418                   "<HR><ADDRESS>djvuserve/DjVuLibre-" DJVULIBRE_VERSION "</ADDRESS>\n"
419 #endif
420                   "</BODY></HTML>\n",
421                   (const char *) cause,
422                   (const char *) g().pathinfo );
423         }
424       else
425         {
426           ex.perror();
427         }
428       exit(10);
429     }
430   G_ENDCATCH;
431   // return code zero
432   return 0;
433 }
434