1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 // From: Leon Bottou, 1/31/2002
57 // This file has very little to do with my initial implementation.
58 // It has been practically rewritten by Lizardtech for i18n changes.
59 // Our original implementation consisted of multiple classes.
60 // <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
61 
62 #ifdef HAVE_CONFIG_H
63 # include "config.h"
64 #endif
65 #if NEED_GNUG_PRAGMAS
66 # pragma implementation
67 #endif
68 
69 // - Author: Leon Bottou, 04/1997
70 
71 #include "DjVuGlobal.h"
72 #include "ByteStream.h"
73 #include "GOS.h"
74 #include "GURL.h"
75 #include "DjVuMessage.h"
76 #include <stddef.h>
77 #include <fcntl.h>
78 #if defined(_WIN32) || defined(__CYGWIN32__)
79 # include <io.h>
80 #endif
81 #if defined(__APPLE__)
82 # include <CoreFoundation/CFString.h>
83 #endif
84 
85 #ifdef UNIX
86 # ifndef HAS_MEMMAP
87 #  define HAS_MEMMAP 1
88 # endif
89 #endif
90 
91 #ifdef UNIX
92 # include <sys/types.h>
93 # include <sys/stat.h>
94 # include <unistd.h>
95 # include <errno.h>
96 # ifdef HAS_MEMMAP
97 #  include <sys/mman.h>
98 # endif
99 #endif
100 
101 #ifdef macintosh
102 # ifndef UNIX
103 #  include <unistd.h>
104 _MSL_IMP_EXP_C int _dup(int);
105 _MSL_IMP_EXP_C int _dup2(int,int);
106 _MSL_IMP_EXP_C int _close(int);
dup(int _a)107 __inline int dup(int _a ) { return _dup(_a);}
dup2(int _a,int _b)108 __inline int dup2(int _a, int _b ) { return _dup2(_a, _b);}
109 # endif
110 #endif
111 
112 #if defined(_WIN32) && !defined(__CYGWIN32__)
113 #  define close _close
114 #  define fdopen _fdopen
115 #  define dup _dup
116 #endif
117 
118 #ifdef HAVE_NAMESPACES
119 namespace DJVU {
120 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
121 }
122 #endif
123 #endif
124 
125 const char *ByteStream::EndOfFile=ERR_MSG("EOF");
126 
127 /** ByteStream interface for stdio files.
128     The virtual member functions #read#, #write#, #tell# and #seek# are mapped
129     to the well known stdio functions #fread#, #fwrite#, #ftell# and #fseek#.
130     @see Unix man page fopen(3), fread(3), fwrite(3), ftell(3), fseek(3) */
131 
132 class ByteStream::Stdio : public ByteStream {
133 public:
134   Stdio(void);
135 
136   /** Constructs a ByteStream for accessing the file named #url#.
137       Arguments #url# and #mode# are similar to the arguments of the well
138       known stdio function #fopen#. In addition a url of #-# will be
139       interpreted as the standard output or the standard input according to
140       #mode#.  This constructor will open a stdio file and construct a
141       ByteStream object accessing this file. Destroying the ByteStream object
142       will flush and close the associated stdio file.  Returns an error code
143       if the stdio file cannot be opened. */
144   GUTF8String init(const GURL &url, const char * const mode);
145 
146   /** Constructs a ByteStream for accessing the stdio file #f#.
147       Argument #mode# indicates the type of the stdio file, as in the
148       well known stdio function #fopen#.  Destroying the ByteStream
149       object will not close the stdio file #f# unless closeme is true. */
150   GUTF8String init(FILE * const f, const char * const mode="rb", const bool closeme=false);
151 
152   /** Initializes from stdio */
153   GUTF8String init(const char mode[]);
154 
155   // Virtual functions
156   ~Stdio();
157   virtual size_t read(void *buffer, size_t size);
158   virtual size_t write(const void *buffer, size_t size);
159   virtual void flush(void);
160   virtual int seek(long offset, int whence = SEEK_SET, bool nothrow=false);
161   virtual long tell(void) const;
162 private:
163   // Cancel C++ default stuff
164   Stdio(const Stdio &);
165   Stdio & operator=(const Stdio &);
166 private:
167   // Implementation
168   bool can_read;
169   bool can_write;
170   bool must_close;
171 protected:
172   FILE *fp;
173   long pos;
174 };
175 
176 inline GUTF8String
init(FILE * const f,const char mode[],const bool closeme)177 ByteStream::Stdio::init(FILE * const f,const char mode[],const bool closeme)
178 {
179   fp=f;
180   must_close=closeme;
181   return init(mode);
182 }
183 
184 
185 /** ByteStream interface managing a memory buffer.
186     Class #ByteStream::Memory# manages a dynamically resizable buffer from
187     which data can be read or written.  The buffer itself is organized as an
188     array of blocks of 4096 bytes.  */
189 
190 class ByteStream::Memory : public ByteStream
191 {
192 public:
193   /** Constructs an empty ByteStream::Memory.
194       The buffer is initially empty. You must first use function #write#
195       to store data into the buffer, use function #seek# to rewind the
196       current position, and function #read# to read the data back. */
197   Memory();
198   /** Constructs a Memory by copying initial data.  The
199       Memory buffer is initialized with #size# bytes copied from the
200       memory area pointed to by #buffer#. */
201   GUTF8String init(const void * const buffer, const size_t size);
202   // Virtual functions
203   ~Memory();
204   virtual size_t read(void *buffer, size_t size);
205   virtual size_t write(const void *buffer, size_t size);
206   virtual int    seek(long offset, int whence=SEEK_SET, bool nothrow=false);
207   virtual long   tell(void) const;
208   /** Erases everything in the Memory.
209       The current location is reset to zero. */
210   void empty();
211   /** Returns the total number of bytes contained in the buffer.  Valid
212       offsets for function #seek# range from 0 to the value returned by this
213       function. */
214   virtual long size(void) const;
215   /** Returns a reference to the byte at offset #n#. This reference can be
216       used to read (as in #mbs[n]#) or modify (as in #mbs[n]=c#) the contents
217       of the buffer. */
218   char &operator[] (int n);
219   char &operator[] (long n);
220   /** Copies all internal data into \Ref{TArray} and returns it */
221 private:
222   // Cancel C++ default stuff
223   Memory(const Memory &);
224   Memory & operator=(const Memory &);
225   // Current position
226   long where;
227 protected:
228   /** Reads data from a random position. This function reads at most #sz#
229       bytes at position #pos# into #buffer# and returns the actual number of
230       bytes read.  The current position is unchanged. */
231   virtual size_t readat(void *buffer, size_t sz, long pos);
232   /** Number of bytes in internal buffer. */
233   long bsize;
234   /** Number of 4096 bytes blocks. */
235   int nblocks;
236   /** Pointers (possibly null) to 4096 bytes blocks. */
237   char **blocks;
238   /** Pointers (possibly null) to 4096 bytes blocks. */
239   GPBuffer<char *> gblocks;
240 };
241 
242 
243 
244 inline long
size(void) const245 ByteStream::Memory::size(void) const
246 {
247   return bsize;
248 }
249 
250 inline char &
operator [](int n)251 ByteStream::Memory::operator[] (int n)
252 {
253   return blocks[n>>12][n&0xfff];
254 }
255 
256 inline char &
operator [](long n)257 ByteStream::Memory::operator[] (long n)
258 {
259   return blocks[n>>12][n&0xfff];
260 }
261 
262 
263 
264 /** Read-only ByteStream interface to a memory area.
265     Class #ByteStream::Static# implements a read-only ByteStream interface for a
266     memory area specified by the user at construction time. Calls to function
267     #read# directly access this memory area.  The user must therefore make
268     sure that its content remain valid long enough.  */
269 
270 class ByteStream::Static : public ByteStream
271 {
272 public:
273 
274   /** Creates a Static object for allocating the memory area of
275       length #sz# starting at address #buffer#. */
276   Static(const void * const buffer, const size_t sz);
277   ~Static();
278   // Virtual functions
279   virtual size_t read(void *buffer, size_t sz);
280   virtual int    seek(long offset, int whence = SEEK_SET, bool nothrow=false);
281   virtual long tell(void) const;
282   /** Returns the total number of bytes contained in the buffer, file, etc.
283       Valid offsets for function #seek# range from 0 to the value returned
284       by this function. */
285   virtual long size(void) const;
286 protected:
287   const char *data;
288   long bsize;
289 private:
290   long where;
291 };
292 
~Static()293 ByteStream::Static::~Static() {}
294 
295 inline long
size(void) const296 ByteStream::Static::size(void) const
297 {
298   return bsize;
299 }
300 
301 #if HAS_MEMMAP
302 /** Read-only ByteStream interface to a memmap area.
303     Class #MemoryMapByteStream# implements a read-only ByteStream interface
304     for a memory map to a file. */
305 
306 class MemoryMapByteStream : public ByteStream::Static
307 {
308 public:
309   MemoryMapByteStream(void);
310   virtual ~MemoryMapByteStream();
311 private:
312   GUTF8String init(const int fd, const bool closeme);
313   GUTF8String init(FILE *const f,const bool closeme);
314   friend class ByteStream;
315 };
316 #endif
317 
318 //// CLASS BYTESTREAM
319 
320 
~ByteStream()321 ByteStream::~ByteStream()
322 {
323 }
324 
325 int
scanf(const char * fmt,...)326 ByteStream::scanf(const char *fmt, ...)
327 {
328   G_THROW( ERR_MSG("ByteStream.not_implemented") ); // This is a place holder function.
329   return 0;
330 }
331 
332 size_t
read(void * buffer,size_t sz)333 ByteStream::read(void *buffer, size_t sz)
334 {
335   G_THROW( ERR_MSG("ByteStream.cant_read") );      //  Cannot read from a ByteStream created for writing
336   return 0;
337 }
338 
339 size_t
write(const void * buffer,size_t sz)340 ByteStream::write(const void *buffer, size_t sz)
341 {
342   G_THROW( ERR_MSG("ByteStream.cant_write") );      //  Cannot write from a ByteStream created for reading
343   return 0;
344 }
345 
346 void
flush()347 ByteStream::flush()
348 {
349 }
350 
351 int
seek(long offset,int whence,bool nothrow)352 ByteStream::seek(long offset, int whence, bool nothrow)
353 {
354   long nwhere = 0;
355   long ncurrent = tell();
356   switch (whence)
357     {
358     case SEEK_SET:
359       nwhere=0; break;
360     case SEEK_CUR:
361       nwhere=ncurrent; break;
362     case SEEK_END:
363       {
364         if(offset)
365           {
366             if (nothrow)
367               return -1;
368             G_THROW( ERR_MSG("ByteStream.backward") );
369           }
370         char buffer[1024];
371         int bytes;
372         while((bytes=read(buffer, sizeof(buffer))))
373           EMPTY_LOOP;
374         return 0;
375       }
376     default:
377       G_THROW( ERR_MSG("ByteStream.bad_arg") );       //  Illegal argument in seek
378     }
379   nwhere += offset;
380   if (nwhere < ncurrent)
381     {
382       //  Seeking backwards is not supported by this ByteStream
383       if (nothrow)
384         return -1;
385       G_THROW( ERR_MSG("ByteStream.backward") );
386     }
387   while (nwhere > ncurrent)
388     {
389       char buffer[1024];
390       long xbytes = nwhere - ncurrent;
391       if (xbytes > (long)sizeof(buffer))
392         xbytes = sizeof(buffer);
393       long bytes = (long)read(buffer, xbytes);
394       ncurrent += bytes;
395       if (!bytes)
396         G_THROW( ByteStream::EndOfFile );
397       //  Seeking works funny on this ByteStream (ftell() acts strange)
398       if (ncurrent != tell())
399         G_THROW( ERR_MSG("ByteStream.seek") );
400     }
401   return 0;
402 }
403 
404 size_t
readall(void * buffer,size_t size)405 ByteStream::readall(void *buffer, size_t size)
406 {
407   size_t total = 0;
408   while (size > 0)
409     {
410       int nitems = read(buffer, size);
411       // Replaced perror() below with G_THROW(). It still makes little sense
412       // as there is no guarantee, that errno is right. Still, throwing
413       // exception instead of continuing to loop is better.
414       // - eaf
415       if(nitems < 0)
416         G_THROW(strerror(errno));               //  (No error in the DjVuMessageFile)
417       if (nitems == 0)
418         break;
419       total += nitems;
420       size -= nitems;
421       buffer = (void*)((char*)buffer + nitems);
422     }
423   return total;
424 }
425 
426 size_t
format(const char * fmt,...)427 ByteStream::format(const char *fmt, ... )
428 {
429   va_list args;
430   va_start(args, fmt);
431   const GUTF8String message(fmt,args);
432   return writestring(message);
433 }
434 
435 size_t
writestring(const GNativeString & s)436 ByteStream::writestring(const GNativeString &s)
437 {
438   int retval;
439   if(cp != UTF8)
440   {
441     retval=writall((const char *)s,s.length());
442     if(cp == AUTO)
443       cp=NATIVE; // Avoid mixing string types.
444   }else
445   {
446     const GUTF8String msg(s.getNative2UTF8());
447     retval=writall((const char *)msg,msg.length());
448   }
449   return retval;
450 }
451 
452 size_t
writestring(const GUTF8String & s)453 ByteStream::writestring(const GUTF8String &s)
454 {
455   int retval;
456   if(cp != NATIVE)
457   {
458     retval=writall((const char *)s,s.length());
459     if(cp == AUTO)
460       cp=UTF8; // Avoid mixing string types.
461   }else
462   {
463     const GNativeString msg(s.getUTF82Native());
464     retval=writall((const char *)msg,msg.length());
465   }
466   return retval;
467 }
468 
469 size_t
writall(const void * buffer,size_t size)470 ByteStream::writall(const void *buffer, size_t size)
471 {
472   size_t total = 0;
473   while (size > 0)
474     {
475       size_t nitems = write(buffer, size);
476       if (nitems == 0)
477         G_THROW( ERR_MSG("ByteStream.write_error") );      //  Unknown error in write
478       total += nitems;
479       size -= nitems;
480       buffer = (void*)((char*)buffer + nitems);
481     }
482   return total;
483 }
484 
485 size_t
copy(ByteStream & bsfrom,size_t size)486 ByteStream::copy(ByteStream &bsfrom, size_t size)
487 {
488   size_t total = 0;
489   const size_t max_buffer_size=200*1024;
490   const size_t buffer_size=(size>0 && size<max_buffer_size)?size:max_buffer_size;
491   char *buffer;
492   GPBuffer<char> gbuf(buffer,buffer_size);
493   for(;;)
494     {
495       size_t bytes = buffer_size;
496       if (size>0 && bytes+total>size)
497         bytes = size - total;
498       if (bytes == 0)
499         break;
500       bytes = bsfrom.read((void*)buffer, bytes);
501       if (bytes == 0)
502         break;
503       writall((void*)buffer, bytes);
504       total += bytes;
505     }
506   return total;
507 }
508 
509 
510 void
write8(unsigned int card)511 ByteStream::write8 (unsigned int card)
512 {
513   unsigned char c[1];
514   c[0] = (card) & 0xff;
515   if (write((void*)c, sizeof(c)) != sizeof(c))
516     G_THROW(strerror(errno));   //  (No error in the DjVuMessageFile)
517 }
518 
519 void
write16(unsigned int card)520 ByteStream::write16(unsigned int card)
521 {
522   unsigned char c[2];
523   c[0] = (card>>8) & 0xff;
524   c[1] = (card) & 0xff;
525   if (writall((void*)c, sizeof(c)) != sizeof(c))
526     G_THROW(strerror(errno));   //  (No error in the DjVuMessageFile)
527 }
528 
529 void
write24(unsigned int card)530 ByteStream::write24(unsigned int card)
531 {
532   unsigned char c[3];
533   c[0] = (card>>16) & 0xff;
534   c[1] = (card>>8) & 0xff;
535   c[2] = (card) & 0xff;
536   if (writall((void*)c, sizeof(c)) != sizeof(c))
537     G_THROW(strerror(errno));   //  (No error in the DjVuMessageFile)
538 }
539 
540 void
write32(unsigned int card)541 ByteStream::write32(unsigned int card)
542 {
543   unsigned char c[4];
544   c[0] = (card>>24) & 0xff;
545   c[1] = (card>>16) & 0xff;
546   c[2] = (card>>8) & 0xff;
547   c[3] = (card) & 0xff;
548   if (writall((void*)c, sizeof(c)) != sizeof(c))
549     G_THROW(strerror(errno));   //  (No error in the DjVuMessageFile)
550 }
551 
552 unsigned int
read8()553 ByteStream::read8 ()
554 {
555   unsigned char c[1];
556   if (readall((void*)c, sizeof(c)) != sizeof(c))
557     G_THROW( ByteStream::EndOfFile );
558   return c[0];
559 }
560 
561 unsigned int
read16()562 ByteStream::read16()
563 {
564   unsigned char c[2];
565   if (readall((void*)c, sizeof(c)) != sizeof(c))
566     G_THROW( ByteStream::EndOfFile );
567   return (c[0]<<8)+c[1];
568 }
569 
570 unsigned int
read24()571 ByteStream::read24()
572 {
573   unsigned char c[3];
574   if (readall((void*)c, sizeof(c)) != sizeof(c))
575     G_THROW( ByteStream::EndOfFile );
576   return (((c[0]<<8)+c[1])<<8)+c[2];
577 }
578 
579 unsigned int
read32()580 ByteStream::read32()
581 {
582   unsigned char c[4];
583   if (readall((void*)c, sizeof(c)) != sizeof(c))
584     G_THROW( ByteStream::EndOfFile );
585   return (((((c[0]<<8)+c[1])<<8)+c[2])<<8)+c[3];
586 }
587 
588 
589 
590 //// CLASS ByteStream::Stdio
591 
Stdio(void)592 ByteStream::Stdio::Stdio(void)
593 : can_read(false),can_write(false),must_close(true),fp(0),pos(0)
594 {}
595 
~Stdio()596 ByteStream::Stdio::~Stdio()
597 {
598   if (fp && must_close)
599     fclose(fp);
600 }
601 
602 GUTF8String
init(const char mode[])603 ByteStream::Stdio::init(const char mode[])
604 {
605   char const *mesg=0;
606   bool binary=false;
607   if(!fp)
608     must_close=false;
609   for (const char *s=mode; s && *s; s++)
610   {
611     switch(*s)
612     {
613       case 'r':
614         can_read=true;
615         if(!fp) fp=stdin;
616         break;
617       case 'w':
618       case 'a':
619         can_write=true;
620         if(!fp) fp=stdout;
621         break;
622       case '+':
623         can_read=can_write=true;
624         break;
625       case 'b':
626         binary=true;
627         break;
628       default:
629         mesg= ERR_MSG("ByteStream.bad_mode"); //  Illegal mode in Stdio
630     }
631   }
632   if(binary && fp) {
633 #if defined(__CYGWIN32__)
634     setmode(fileno(fp), O_BINARY);
635 #elif defined(_WIN32)
636     _setmode(_fileno(fp), _O_BINARY);
637 #endif
638   }
639   GUTF8String retval;
640   if(!mesg)
641   {
642     tell();
643   }else
644   {
645     retval=mesg;
646   }
647   if(mesg &&(fp && must_close))
648   {
649     fclose(fp);
650     fp=0;
651     must_close=false;
652   }
653   return retval;
654 }
655 
656 #ifdef _WIN32
657 static wchar_t *
utf8_to_wide(const char * cstr)658 utf8_to_wide(const char *cstr)
659 {
660   int wlen = strlen(cstr) + 1;
661   wchar_t *wstr = new wchar_t[wlen];
662   if (GUTF8String(cstr).ncopy(wstr, wlen) > 0)
663     return wstr;
664   delete [] wstr;
665   return 0;
666 }
667 #endif
668 
669 #ifdef __APPLE__
670 static char *
utf8_to_utf8mac(const char * cstr)671 utf8_to_utf8mac(const char *cstr)
672 {
673   int len = strlen(cstr);
674   CFStringRef utf8 = CFStringCreateWithCString(NULL, cstr, kCFStringEncodingUTF8);
675   int buflen = CFStringGetMaximumSizeOfFileSystemRepresentation(utf8);
676   if (buflen < len+1) buflen = len+1;
677   char *nfdstr = new char[buflen];
678   if (! CFStringGetFileSystemRepresentation(utf8, nfdstr, buflen))
679     strcpy(nfdstr, cstr);
680   return nfdstr;
681 }
682 #endif
683 
684 
685 static FILE *
urlfopen(const GURL & url,const char mode[])686 urlfopen(const GURL &url,const char mode[])
687 {
688   FILE *retval = 0;
689 #if defined(_WIN32)
690   // On Win, try to use _wfopen instead of fopen
691   wchar_t *wstr = utf8_to_wide((const char*)url.UTF8Filename());
692   wchar_t *wmode = utf8_to_wide(mode);
693   if (wstr && wmode)
694     retval = _wfopen(wstr, wmode);
695   delete [] wstr;
696   delete [] wmode;
697   if (! retval)
698     retval = fopen((const char *)url.NativeFilename(),mode);
699 #elif defined(__APPLE__)
700   // On Mac, prefer the NFD version of the UTF8 filename
701   const char *cnfd = utf8_to_utf8mac((const char*)url.UTF8Filename());
702   retval = fopen(cnfd, mode);
703   delete [] cnfd;
704   if (! retval) // Otherwise try unnormalized UTF8
705     retval = fopen((const char*)url.UTF8Filename(), mode);
706 #else
707   // Unix filesystems are usually in native encoding
708   retval = fopen((const char *)url.NativeFilename(),mode);
709   if (! retval)
710     retval = fopen((const char *)url.UTF8Filename(),mode);
711 #endif
712   return retval;
713 }
714 
715 #ifdef UNIX
716 static int
urlopen(const GURL & url,const int mode,const int perm)717 urlopen(const GURL &url, const int mode, const int perm)
718 {
719   int retval = -1;
720 #if defined(__APPLE__)
721   // see above
722   const char *cnfd = utf8_to_utf8mac((const char*)url.UTF8Filename());
723   retval = open(cnfd, mode, perm);
724   delete [] cnfd;
725   if (retval < 0)
726     retval = open((const char*)url.UTF8Filename(), mode, perm);
727 #else
728   // see above
729   retval = open((const char *)url.NativeFilename(),mode,perm);
730   if (retval < 0)
731     retval = open((const char *)url.UTF8Filename(),mode,perm);
732 #endif
733   return retval;
734 }
735 #endif /* UNIX */
736 
737 GUTF8String
init(const GURL & url,const char mode[])738 ByteStream::Stdio::init(const GURL &url, const char mode[])
739 {
740   GUTF8String retval;
741   if (url.fname() != "-")
742   {
743     fp = urlfopen(url,mode);
744     if (!fp)
745     {
746       //  Failed to open '%s': %s
747       G_THROW( ERR_MSG("ByteStream.open_fail") "\t" + url.name()
748                +"\t"+GNativeString(strerror(errno)).getNative2UTF8());
749     }
750   }
751   return retval.length()?retval:init(mode);
752 }
753 
754 size_t
read(void * buffer,size_t size)755 ByteStream::Stdio::read(void *buffer, size_t size)
756 {
757   if (!can_read)
758     G_THROW( ERR_MSG("ByteStream.no_read") ); //  Stdio not opened for reading
759   size_t nitems;
760   do
761   {
762     clearerr(fp);
763     nitems = fread(buffer, 1, size, fp);
764     if (nitems<=0 && ferror(fp))
765     {
766 #ifdef EINTR
767       if (errno!=EINTR)
768 #endif
769         G_THROW(strerror(errno)); //  (No error in the DjVuMessageFile)
770     }
771     else
772       break;
773   } while(true);
774   pos += nitems;
775   return nitems;
776 }
777 
778 size_t
write(const void * buffer,size_t size)779 ByteStream::Stdio::write(const void *buffer, size_t size)
780 {
781   if (!can_write)
782     G_THROW( ERR_MSG("ByteStream.no_write") ); //  Stdio not opened for writing
783   size_t nitems;
784   do
785   {
786     clearerr(fp);
787     nitems = fwrite(buffer, 1, size, fp);
788     if (nitems<=0 && ferror(fp))
789     {
790 #ifdef EINTR
791       if (errno!=EINTR)
792 #endif
793         G_THROW(strerror(errno)); //  (No error in the DjVuMessageFile)
794     }
795     else
796       break;
797   } while(true);
798   pos += nitems;
799   return nitems;
800 }
801 
802 void
flush()803 ByteStream::Stdio::flush()
804 {
805   if (fflush(fp) < 0)
806     G_THROW(strerror(errno)); //  (No error in the DjVuMessageFile)
807 }
808 
809 long
tell(void) const810 ByteStream::Stdio::tell(void) const
811 {
812   long x = ftell(fp);
813   if (x >= 0)
814   {
815     Stdio *sbs=const_cast<Stdio *>(this);
816     (sbs->pos) = x;
817   }else
818   {
819     x=pos;
820   }
821   return x;
822 }
823 
824 int
seek(long offset,int whence,bool nothrow)825 ByteStream::Stdio::seek(long offset, int whence, bool nothrow)
826 {
827   if (whence==SEEK_SET && offset>=0 && offset==ftell(fp))
828     return 0;
829   clearerr(fp);
830   if (fseek(fp, offset, whence))
831     {
832       if (nothrow)
833         return -1;
834       G_THROW(strerror(errno)); //  (No error in the DjVuMessageFile)
835     }
836   return tell();
837 }
838 
839 
840 
841 
842 ///////// ByteStream::Memory
843 
Memory()844 ByteStream::Memory::Memory()
845   : where(0), bsize(0), nblocks(0), gblocks(blocks,0)
846 {
847 }
848 
849 GUTF8String
init(void const * const buffer,const size_t sz)850 ByteStream::Memory::init(void const * const buffer, const size_t sz)
851 {
852   GUTF8String retval;
853   G_TRY
854   {
855     writall(buffer, sz);
856     where = 0;
857   }
858   G_CATCH(ex) // The only error that should be thrown is out of memory...
859   {
860     retval=ex.get_cause();
861   }
862   G_ENDCATCH;
863   return retval;
864 }
865 
866 void
empty()867 ByteStream::Memory::empty()
868 {
869   for (int b=0; b<nblocks; b++)
870   {
871     delete [] blocks[b];
872     blocks[b]=0;
873   }
874   bsize = 0;
875   where = 0;
876   nblocks = 0;
877 }
878 
~Memory()879 ByteStream::Memory::~Memory()
880 {
881   empty();
882 }
883 
884 size_t
write(const void * buffer,size_t sz)885 ByteStream::Memory::write(const void *buffer, size_t sz)
886 {
887   long nsz = (long)sz;
888   if (nsz <= 0)
889     return 0;
890   // check memory
891   if ( (where+nsz) > ((bsize+0xfff)&~0xfff) )
892     {
893       // reallocate pointer array
894       if ( (where+nsz) > (nblocks<<12) )
895         {
896           const long old_nblocks=nblocks;
897           nblocks = (((where+nsz)+0xffff)&~0xffff) >> 12;
898           gblocks.resize(nblocks);
899           char const ** eblocks=(char const **)(blocks+old_nblocks);
900           for(char const * const * const new_eblocks=blocks+nblocks;
901               eblocks <new_eblocks; eblocks++)
902             {
903               *eblocks = 0;
904             }
905         }
906       // allocate blocks
907       for (long b=(where>>12); (b<<12)<(where+nsz); b++)
908         {
909           if (! blocks[b])
910             blocks[b] = new char[0x1000];
911         }
912     }
913   // write data to buffer
914   while (nsz > 0)
915     {
916       long n = (where|0xfff) + 1 - where;
917       n = ((nsz < n) ? nsz : n);
918       memcpy( (void*)&blocks[where>>12][where&0xfff], buffer, (size_t)n);
919       buffer = (void*) ((char*)buffer + n);
920       where += n;
921       nsz -= n;
922     }
923   // adjust size
924   if (where > bsize)
925     bsize = where;
926   return sz;
927 }
928 
929 size_t
readat(void * buffer,size_t sz,long pos)930 ByteStream::Memory::readat(void *buffer, size_t sz, long pos)
931 {
932   if ((long)sz > bsize - pos)
933     sz = (size_t)(bsize - pos);
934   long nsz = (long)sz;
935   if (nsz <= 0)
936     return 0;
937   // read data from buffer
938   while (nsz > 0)
939     {
940       long n = (pos|0xfff) + 1 - pos;
941       n = ((nsz < n) ? nsz : n);
942       memcpy(buffer, (void*)&blocks[pos>>12][pos&0xfff], (size_t)n);
943       buffer = (void*) ((char*)buffer + n);
944       pos += n;
945       nsz -= n;
946     }
947   return sz;
948 }
949 
950 size_t
read(void * buffer,size_t sz)951 ByteStream::Memory::read(void *buffer, size_t sz)
952 {
953   sz = readat(buffer,sz,where);
954   where += sz;
955   return sz;
956 }
957 
958 long
tell(void) const959 ByteStream::Memory::tell(void) const
960 {
961   return where;
962 }
963 
964 int
seek(long offset,int whence,bool nothrow)965 ByteStream::Memory::seek(long offset, int whence, bool nothrow)
966 {
967   long nwhere = 0;
968   switch (whence)
969     {
970     case SEEK_SET: nwhere = 0; break;
971     case SEEK_CUR: nwhere = where; break;
972     case SEEK_END: nwhere = bsize; break;
973     default: G_THROW( ERR_MSG("bad_arg") "\tByteStream::Memory::seek()");
974     }
975   nwhere += offset;
976   if (nwhere<0)
977     G_THROW( ERR_MSG("ByteStream.seek_error2") );
978   where = nwhere;
979   return 0;
980 }
981 
982 
983 
984 /** This function has been moved into Arrays.cpp
985     In order to avoid dependencies from ByteStream.o
986     to Arrays.o */
987 #ifdef DO_NOT_MOVE_GET_DATA_TO_ARRAYS_CPP
988 TArray<char>
get_data(void)989 ByteStream::get_data(void)
990 {
991    TArray<char> data(0, size()-1);
992    readat((char*)data, size(), 0);
993    return data;
994 }
995 #endif
996 
997 
998 ///////// ByteStream::Static
999 
Static(const void * const buffer,const size_t sz)1000 ByteStream::Static::Static(const void * const buffer, const size_t sz)
1001   : data((const char *)buffer), bsize(sz), where(0)
1002 {
1003 }
1004 
1005 size_t
read(void * buffer,size_t sz)1006 ByteStream::Static::read(void *buffer, size_t sz)
1007 {
1008   long nsz = (long)sz;
1009   if (nsz > bsize - where)
1010     nsz = bsize - where;
1011   if (nsz <= 0)
1012     return 0;
1013   memcpy(buffer, data+where, nsz);
1014   where += nsz;
1015   return nsz;
1016 }
1017 
1018 int
seek(long offset,int whence,bool nothrow)1019 ByteStream::Static::seek(long offset, int whence, bool nothrow)
1020 {
1021   long nwhere = 0;
1022   switch (whence)
1023     {
1024     case SEEK_SET: nwhere = 0; break;
1025     case SEEK_CUR: nwhere = where; break;
1026     case SEEK_END: nwhere = bsize; break;
1027     default: G_THROW("bad_arg\tByteStream::Static::seek()");
1028       //  Illegal argument to ByteStream::Static::seek()
1029     }
1030   nwhere += offset;
1031   if (nwhere<0)
1032     G_THROW( ERR_MSG("ByteStream.seek_error2") );
1033   //  Attempt to seek before the beginning of the file
1034   where = nwhere;
1035   return 0;
1036 }
1037 
1038 long
tell(void) const1039 ByteStream::Static::tell(void) const
1040 {
1041   return where;
1042 }
1043 
1044 GP<ByteStream>
create(void)1045 ByteStream::create(void)
1046 {
1047   return new Memory();
1048 }
1049 
1050 GP<ByteStream>
create(void const * const buffer,const size_t size)1051 ByteStream::create(void const * const buffer, const size_t size)
1052 {
1053   Memory *mbs=new Memory();
1054   GP<ByteStream> retval=mbs;
1055   mbs->init(buffer,size);
1056   return retval;
1057 }
1058 
1059 GP<ByteStream>
create(const GURL & url,char const * const xmode)1060 ByteStream::create(const GURL &url,char const * const xmode)
1061 {
1062   GP<ByteStream> retval;
1063   const char *mode = ((xmode) ? xmode : "rb");
1064 #ifdef UNIX
1065   if (!strcmp(mode,"rb"))
1066     {
1067       int fd = urlopen(url,O_RDONLY,0777);
1068       if (fd >= 0)
1069         {
1070 #if HAS_MEMMAP && defined(S_IFREG)
1071           struct stat buf;
1072           if ( (fstat(fd, &buf) >= 0) && (buf.st_mode & S_IFREG) )
1073             {
1074               MemoryMapByteStream *rb = new MemoryMapByteStream();
1075               retval = rb;
1076               GUTF8String errmessage = rb->init(fd,true);
1077               if(errmessage.length())
1078                 retval=0;
1079             }
1080 #endif
1081           if (! retval)
1082             {
1083               FILE *f = fdopen(fd, mode);
1084               if (f)
1085                 {
1086                   Stdio *sbs=new Stdio();
1087                   retval=sbs;
1088                   GUTF8String errmessage=sbs->init(f, mode, true);
1089                   if(errmessage.length())
1090                     retval=0;
1091                 }
1092             }
1093           if (! retval)
1094             close(fd);
1095         }
1096     }
1097 #endif
1098   if (! retval)
1099     {
1100       Stdio *sbs=new Stdio();
1101       retval=sbs;
1102       GUTF8String errmessage=sbs->init(url, mode);
1103       if(errmessage.length())
1104         G_THROW(errmessage);
1105     }
1106   return retval;
1107 }
1108 
1109 GP<ByteStream>
create(char const * const mode)1110 ByteStream::create(char const * const mode)
1111 {
1112   GP<ByteStream> retval;
1113   Stdio *sbs=new Stdio();
1114   retval=sbs;
1115   GUTF8String errmessage=sbs->init(mode?mode:"rb");
1116   if(errmessage.length())
1117   {
1118     G_THROW(errmessage);
1119   }
1120   return retval;
1121 }
1122 
1123 GP<ByteStream>
create(const int fd,char const * const mode,const bool closeme)1124 ByteStream::create(const int fd,char const * const mode,const bool closeme)
1125 {
1126   GP<ByteStream> retval;
1127   const char *default_mode="rb";
1128 #if HAS_MEMMAP
1129   if (   (!mode&&(fd!=0)&&(fd!=1)&&(fd!=2))
1130       || (mode&&(GUTF8String("rb") == mode)))
1131   {
1132     MemoryMapByteStream *rb=new MemoryMapByteStream();
1133     retval=rb;
1134     GUTF8String errmessage=rb->init(fd,closeme);
1135     if(errmessage.length())
1136     {
1137       retval=0;
1138     }
1139   }
1140   if(!retval)
1141 #endif
1142   {
1143     int fd2 = fd;
1144     FILE *f = 0;
1145     if (fd == 0 && !closeme
1146         && (!mode || mode[0]=='r') )
1147       {
1148         f=stdin;
1149         default_mode = "r";
1150         fd2=(-1);
1151       }
1152     else if (fd == 1 && !closeme
1153              && (!mode || mode[0]=='a' || mode[0]=='w') )
1154       {
1155         default_mode = "a";
1156         f=stdout;
1157         fd2 = -1;
1158       }
1159     else if (fd == 2 && !closeme
1160              && (!mode || mode[0]=='a' || mode[0]=='w') )
1161       {
1162         default_mode = "a";
1163         f=stderr;
1164         fd2 = -1;
1165       }
1166     else
1167       {
1168         if (! closeme)
1169           fd2 = dup(fd);
1170         f = fdopen(fd2,(char*)(mode?mode:default_mode));
1171       }
1172 
1173     if(!f)
1174       {
1175         if ( fd2 >= 0)
1176           close(fd2);
1177         G_THROW( ERR_MSG("ByteStream.open_fail2") );
1178       }
1179     Stdio *sbs=new Stdio();
1180     retval=sbs;
1181     GUTF8String errmessage=sbs->init(f,mode?mode:default_mode,(fd2>=0));
1182     if(errmessage.length())
1183       G_THROW(errmessage);
1184   }
1185   return retval;
1186 }
1187 
1188 GP<ByteStream>
create(FILE * const f,char const * const mode,const bool closeme)1189 ByteStream::create(FILE * const f,char const * const mode,const bool closeme)
1190 {
1191   GP<ByteStream> retval;
1192 #if HAS_MEMMAP
1193   if (!mode || (GUTF8String("rb") == mode))
1194   {
1195     MemoryMapByteStream *rb=new MemoryMapByteStream();
1196     retval=rb;
1197     GUTF8String errmessage=rb->init(fileno(f),false);
1198     if(errmessage.length())
1199     {
1200       retval=0;
1201     }else
1202     {
1203       fclose(f);
1204     }
1205   }
1206   if(!retval)
1207 #endif
1208   {
1209     Stdio *sbs=new Stdio();
1210     retval=sbs;
1211     GUTF8String errmessage=sbs->init(f,mode?mode:"rb",closeme);
1212     if(errmessage.length())
1213     {
1214       G_THROW(errmessage);
1215     }
1216   }
1217   return retval;
1218 }
1219 
1220 GP<ByteStream>
create_static(const void * buffer,size_t sz)1221 ByteStream::create_static(const void * buffer, size_t sz)
1222 {
1223   return new Static(buffer, sz);
1224 }
1225 
1226 #if HAS_MEMMAP
MemoryMapByteStream(void)1227 MemoryMapByteStream::MemoryMapByteStream(void)
1228 : ByteStream::Static(0,0)
1229 {}
1230 
1231 GUTF8String
init(FILE * const f,const bool closeme)1232 MemoryMapByteStream::init(FILE *const f,const bool closeme)
1233 {
1234   GUTF8String retval;
1235   retval=init(fileno(f),false);
1236   if(closeme)
1237   {
1238     fclose(f);
1239   }
1240   return retval;
1241 }
1242 
1243 GUTF8String
init(const int fd,const bool closeme)1244 MemoryMapByteStream::init(const int fd,const bool closeme)
1245 {
1246   GUTF8String retval;
1247   data = (char*)(-1);
1248 #if defined(PROT_READ) && defined(MAP_SHARED)
1249   struct stat statbuf;
1250   if(!fstat(fd,&statbuf) && statbuf.st_size)
1251     {
1252       bsize=statbuf.st_size;
1253       data=(char *)mmap(0,statbuf.st_size,PROT_READ,MAP_SHARED,fd,0);
1254     }
1255 #endif
1256   if(data == (char *)(-1))
1257     retval = ERR_MSG("ByteStream.open_fail2");
1258   if(closeme)
1259     close(fd);
1260   return retval;
1261 }
1262 
~MemoryMapByteStream()1263 MemoryMapByteStream::~MemoryMapByteStream()
1264 {
1265   if(data)
1266   {
1267     munmap(const_cast<char *>(data),bsize);
1268   }
1269 }
1270 
1271 #endif
1272 
~Wrapper()1273 ByteStream::Wrapper::~Wrapper() {}
1274 
1275 
1276 GP<ByteStream>
get_stdin(char const * mode)1277 ByteStream::get_stdin(char const *mode)
1278 {
1279   static GP<ByteStream> gp = ByteStream::create(0,mode,false);
1280   return gp;
1281 }
1282 
1283 GP<ByteStream>
get_stdout(char const * mode)1284 ByteStream::get_stdout(char const *mode)
1285 {
1286   static GP<ByteStream> gp = ByteStream::create(1,mode,false);
1287   return gp;
1288 }
1289 
1290 GP<ByteStream>
get_stderr(char const * mode)1291 ByteStream::get_stderr(char const *mode)
1292 {
1293   static GP<ByteStream> gp = ByteStream::create(2,mode,false);
1294   return gp;
1295 }
1296 
1297 
1298 /** Looks up the message and writes it to the specified stream. */
formatmessage(const char * fmt,...)1299 void ByteStream::formatmessage( const char *fmt, ... )
1300 {
1301   va_list args;
1302   va_start(args, fmt);
1303   const GUTF8String message(fmt,args);
1304   writemessage( message );
1305 }
1306 
1307 /** Looks up the message and writes it to the specified stream. */
writemessage(const char * message)1308 void ByteStream::writemessage( const char *message )
1309 {
1310   writestring( DjVuMessage::LookUpUTF8( message ) );
1311 }
1312 
1313 static void
read_file(ByteStream & bs,char * & buffer,GPBuffer<char> & gbuffer)1314 read_file(ByteStream &bs,char *&buffer,GPBuffer<char> &gbuffer)
1315 {
1316   const int size=bs.size();
1317   int pos=0;
1318   if(size>0)
1319   {
1320     size_t readsize=size+1;
1321     gbuffer.resize(readsize);
1322     for(int i;readsize&&(i=bs.read(buffer+pos,readsize))>0;pos+=i,readsize-=i)
1323       EMPTY_LOOP;
1324   }else
1325   {
1326     const size_t readsize=32768;
1327     gbuffer.resize(readsize);
1328     for(int i;((i=bs.read(buffer+pos,readsize))>0);
1329       gbuffer.resize((pos+=i)+readsize))
1330       EMPTY_LOOP;
1331   }
1332   buffer[pos]=0;
1333 }
1334 
1335 GNativeString
getAsNative(void)1336 ByteStream::getAsNative(void)
1337 {
1338   char *buffer;
1339   GPBuffer<char> gbuffer(buffer);
1340   read_file(*this,buffer,gbuffer);
1341   return GNativeString(buffer);
1342 }
1343 
1344 GUTF8String
getAsUTF8(void)1345 ByteStream::getAsUTF8(void)
1346 {
1347   char *buffer;
1348   GPBuffer<char> gbuffer(buffer);
1349   read_file(*this,buffer,gbuffer);
1350   return GUTF8String(buffer);
1351 }
1352 
1353 
1354 #ifdef HAVE_NAMESPACES
1355 }
1356 # ifndef NOT_USING_DJVU_NAMESPACE
1357 using namespace DJVU;
1358 # endif
1359 #endif
1360 
1361 void
1362 DjVuPrintErrorUTF8(const char *fmt, ... )
1363 {
1364   G_TRY {
1365     GP<ByteStream> errout = ByteStream::get_stderr();
1366     if (errout)
1367       {
1368         errout->cp=ByteStream::NATIVE;
1369         va_list args;
1370         va_start(args, fmt);
1371         const GUTF8String message(fmt,args);
1372         errout->writestring(message);
1373       }
1374     // Need to catch all exceptions because these might be
1375     // called from an outer exception handler (with prejudice)
1376   } G_CATCH_ALL { } G_ENDCATCH;
1377 }
1378 
1379 void
1380 DjVuPrintErrorNative(const char *fmt, ... )
1381 {
1382   G_TRY {
1383     GP<ByteStream> errout = ByteStream::get_stderr();
1384     if (errout)
1385       {
1386         errout->cp=ByteStream::NATIVE;
1387         va_list args;
1388         va_start(args, fmt);
1389         const GNativeString message(fmt,args);
1390         errout->writestring(message);
1391       }
1392     // Need to catch all exceptions because these might be
1393     // called from an outer exception handler (with prejudice)
1394   } G_CATCH_ALL { } G_ENDCATCH;
1395 }
1396 
1397 void
1398 DjVuPrintMessageUTF8(const char *fmt, ... )
1399 {
1400   G_TRY {
1401     GP<ByteStream> strout = ByteStream::get_stdout();
1402     if (strout)
1403       {
1404         strout->cp=ByteStream::NATIVE;
1405         va_list args;
1406         va_start(args, fmt);
1407         const GUTF8String message(fmt,args);
1408         strout->writestring(message);
1409       }
1410     // Need to catch all exceptions because these might be
1411     // called from an outer exception handler (with prejudice)
1412   } G_CATCH_ALL { } G_ENDCATCH;
1413 }
1414 
1415 void
1416 DjVuPrintMessageNative(const char *fmt, ... )
1417 {
1418   G_TRY {
1419     GP<ByteStream> strout = ByteStream::get_stdout();
1420     if (strout)
1421       {
1422         strout->cp=ByteStream::NATIVE;
1423         va_list args;
1424         va_start(args, fmt);
1425         const GNativeString message(fmt,args);
1426         strout->writestring(message);
1427       }
1428     // Need to catch all exceptions because these might be
1429     // called from an outer exception handler (with prejudice)
1430   } G_CATCH_ALL { } G_ENDCATCH;
1431 }
1432