1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 #ifndef _IFFBYTESTREAM_H_
57 #define _IFFBYTESTREAM_H_
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 #if NEED_GNUG_PRAGMAS
62 # pragma interface
63 #endif
64 
65 
66 /** @name IFFByteStream.h
67 
68     Files #"IFFByteStream.h"# and #"IFFByteStream.cpp"# implement a parser for
69     files structured according the Electronic Arts ``EA IFF 85 Interchange
70     File Format''.  IFF files are composed of a sequence of data {\em chunks}.
71     Each chunk is identified by a four character {\em chunk identifier}
72     describing the type of the data stored in the chunk.  A few special chunk
73     identifiers, for instance #"FORM"#, are reserved for {\em composite
74     chunks} which themselves contain a sequence of data chunks.  This
75     conventions effectively provides IFF files with a convenient hierarchical
76     structure.  Composite chunks are further identified by a secondary chunk
77     identifier.
78 
79     We found convenient to define a {\em extended chunk identifier}.  In the
80     case of a regular chunk, the extended chunk identifier is simply the
81     chunk identifier, as in #"PM44"#. In the case of a composite chunk, the
82     extended chunk identifier is composed by concatenating the main chunk
83     identifier, a colon, and the secondary chunk identifier, as in
84     #"FORM:DJVU"#.
85 
86     Class \Ref{IFFByteStream} provides a way to read or write IFF structured
87     files.  Member functions provide an easy mean to position the underlying
88     \Ref{ByteStream} at the beginning of each chunk and to read or write the
89     data until reaching the end of the chunk.  The utility program
90     \Ref{djvuinfo} demonstrates how to use class #IFFByteStream#.
91 
92     {\bf IFF Files and ZP-Coder} ---
93     Class #IFFByteStream# repositions the underlying ByteStream whenever a new
94     chunk is accessed.  It is possible to code chunk data with the ZP-Coder
95     without worrying about the final file position. See class \Ref{ZPCodec}
96     for more details.
97 
98     {\bf DjVu IFF Files} --- We had initially planned to exactly follow the
99     IFF specifications.  Then we realized that certain versions of MSIE
100     recognize any IFF file as a Microsoft AIFF sound file and pop a message
101     box "Cannot play that sound".  It appears that the structure of AIFF files
102     is entirely modeled after the IFF standard, with small variations
103     regarding the endianness of numbers and the padding rules.  We eliminate
104     this problem by casting the octet protection spell.  Our IFF files always
105     start with the four octets #0x41,0x54,0x26,0x54# followed by the fully
106     conformant IFF byte stream.  Class #IFFByteStream# silently skips these
107     four octets when it encounters them.
108 
109     {\bf References} --- EA IFF 85 Interchange File Format specification:\\
110     \URL{http://www.cica.indiana.edu/graphics/image_specs/ilbm.format.txt} or
111     \URL{http://www.tnt.uni-hannover.de/soft/compgraph/fileformats/docs/iff.pre}
112 
113     @memo
114     IFF file parser.
115     @author
116     L\'eon Bottou <leonb@research.att.com>
117 
118 // From: Leon Bottou, 1/31/2002
119 // This has been changed by Lizardtech to fit better
120 // with their re-implementation of ByteStreams.
121 
122 */
123 //@{
124 
125 
126 #include "DjVuGlobal.h"
127 #include <stddef.h>
128 #include <stdlib.h>
129 #include <stdio.h>
130 #include <string.h>
131 #include "GException.h"
132 #include "GString.h"
133 #include "ByteStream.h"
134 
135 
136 #ifdef HAVE_NAMESPACES
137 namespace DJVU {
138 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
139 }
140 #endif
141 #endif
142 
143 /** ByteStream interface for an IFF file.
144 
145     Class #IFFByteStream# augments the #ByteStream# interface with
146     functions for navigating from chunk to chunk.  It works in relation
147     with a ByteStream specified at construction time.
148 
149     {\bf Reading an IFF file} --- You can read an IFF file by constructing an
150     #IFFByteStream# object attached to the ByteStream containing the IFF file.
151     Calling function \Ref{get_chunk} positions the file pointer at the
152     beginning of the first chunk.  You can then use \Ref{ByteStream::read} to
153     access the chunk data.  Function #read# will return #0# if you attempt to
154     read past the end of the chunk, just as if you were trying to read past
155     the end of a file. You can at any time call function \Ref{close_chunk} to
156     terminate reading data in this chunk.  The following chunks can be
157     accessed by calling #get_chunk# and #close_chunk# repeatedly until you
158     reach the end of the file.  Function #read# is not very useful when
159     accessing a composite chunk.  You can instead make nested calls to
160     functions #get_chunk# and #close_chunk# in order to access the chunks
161     located inside the composite chunk.
162 
163     {\bf Writing an IFF file} --- You can write an IFF file by constructing an
164     #IFFByteStream# object attached to the seekable ByteStream object that
165     will contain the IFF file.  Calling function \Ref{put_chunk} creates a
166     first chunk header and positions the file pointer at the beginning of the
167     chunk.  You can then use \Ref{ByteStream::write} to store the chunk data.
168     Calling function \Ref{close_chunk} terminates the current chunk.  You can
169     append more chunks by calling #put_chunk# and #close_chunk# repeatedly.
170     Function #write# is not very useful for writing a composite chunk.  You
171     can instead make nested calls to function #put_chunk# and #close_chunk# in
172     order to create chunks located inside the composite chunk.
173 
174     Writing an IFF file requires a seekable ByteStream (see
175     \Ref{ByteStream::is_seekable}).  This is not much of a problem because you
176     can always create the IFF file into a \Ref{MemoryByteStream} and then use
177     \Ref{ByteStream::copy} to transfer the IFF file into a non seekable
178     ByteStream.  */
179 
180 class DJVUAPI IFFByteStream : protected ByteStream::Wrapper
181 {
182 protected:
183   IFFByteStream(const GP<ByteStream> &bs, const int pos);
184 public:
185   /** Constructs an IFFByteStream object attached to ByteStream #bs#.
186       Any ByteStream can be used when reading an IFF file.  Writing
187       an IFF file however requires a seekable ByteStream. */
188   static GP<IFFByteStream> create(const GP<ByteStream> &bs);
189   // --- BYTESTREAM INTERFACE
190   ~IFFByteStream();
191   virtual size_t read(void *buffer, size_t size);
192   virtual size_t write(const void *buffer, size_t size);
193   virtual long tell(void) const;
194   // -- NAVIGATING CHUNKS
195   /** Enters a chunk for reading.  Function #get_chunk# returns zero when the
196       last chunk has already been accessed.  Otherwise it parses a chunk
197       header, positions the IFFByteStream at the beginning of the chunk data,
198       stores the extended chunk identifier into string #chkid#, and returns
199       the non zero chunk size.  The file offset of the chunk data may be
200       retrieved using function #tell#.  The chunk data can then be read using
201       function #read# until reaching the end of the chunk.  Advanced users may
202       supply two pointers to integer variables using arguments #rawoffsetptr#
203       and #rawsizeptr#. These variables will be overwritten with the offset
204       and the length of the file segment containing both the chunk header and
205       the chunk data. */
206   int get_chunk(GUTF8String &chkid, int *rawoffsetptr=0, int *rawsizeptr=0);
207   /** Enters a chunk for writing.  Function #put_chunk# prepares a chunk
208       header and positions the IFFByteStream at the beginning of the chunk
209       data.  Argument #chkid# defines a extended chunk identifier for this
210       chunk.  The chunk data can then be written using function #write#.  The
211       chunk is terminated by a matching call to function #close_chunk#.  When
212       #insertmagic# is non zero, function #put_chunk# inserts the bytes:
213       0x41, 0x54, 0x26, 0x54 before the chunk header, as discussed in
214       \Ref{IFFByteStream.h}. */
215   void put_chunk(const char *chkid, int insertmagic=0);
216   /** Leaves the current chunk.  This function leaves the chunk previously
217       entered by a matching call to #get_chunk# and #put_chunk#.  The
218       IFFByteStream is then ready to process the next chunk at the same
219       hierarchical level. */
220   void close_chunk();
221   /** This is identical to the above, plus it adds a seek to the start of
222       the next chunk.  This way we catch EOF errors with the current chunk.*/
223   void seek_close_chunk();
224   /** Returns true when it is legal to call #read# or #write#. */
225   int ready();
226   /** Returns true when the current chunk is a composite chunk. */
227   int composite();
228   /** Returns the current chunk identifier of the current chunk.  String
229       #chkid# is overwritten with the {\em extended chunk identifier} of the
230       current chunk.  The extended chunk identifier of a regular chunk is
231       simply the chunk identifier, as in #"PM44"#.  The extended chunk
232       identifier of a composite chunk is the concatenation of the chunk
233       identifier, of a semicolon #":"#, and of the secondary chunk identifier,
234       as in #"FORM:DJVU"#. */
235   void short_id(GUTF8String &chkid);
236   /** Returns the qualified chunk identifier of the current chunk.  String
237       #chkid# is overwritten with the {\em qualified chunk identifier} of the
238       current chunk.  The qualified chunk identifier of a composite chunk is
239       equal to the extended chunk identifier.  The qualified chunk identifier
240       of a regular chunk is composed by concatenating the secondary chunk
241       identifier of the closest #"FORM"# or #"PROP"# composite chunk
242       containing the current chunk, a dot #"."#, and the current chunk
243       identifier, as in #"DJVU.INFO"#.  According to the EA IFF 85 identifier
244       scoping rules, the qualified chunk identifier uniquely defines how the
245       chunk data should be interpreted. */
246   void full_id(GUTF8String &chkid);
247   /** Checks a potential chunk identifier.  This function categorizes the
248       chunk identifier formed by the first four characters of string #chkid#.
249       It returns #0# if this is a legal identifier for a regular chunk.  It
250       returns #+1# if this is a reserved composite chunk identifier.  It
251       returns #-1# if this is an illegal or otherwise reserved identifier
252       which should not be used.  */
253   static int check_id(const char *id);
get_bytestream(void)254   GP<ByteStream> get_bytestream(void) {return this;}
255   /** Copy data from another ByteStream.  A maximum of #size# bytes are read
256       from the ByteStream #bsfrom# and are written to the ByteStream #*this#
257       at the current position.  Less than #size# bytes may be written if an
258       end-of-file mark is reached on #bsfrom#.  This function returns the
259       total number of bytes copied.  Setting argument #size# to zero (the
260       default value) has a special meaning: the copying process will continue
261       until reaching the end-of-file mark on ByteStream #bsfrom#, regardless
262       of the number of bytes transferred.  */
263   size_t copy(ByteStream &bsfrom, size_t size=0)
264   { return get_bytestream()->copy(bsfrom,size); }
265   /** Flushes all buffers in the ByteStream.  Calling this function
266       guarantees that pending data have been actually written (i.e. passed to
267       the operating system). Class #ByteStream# provides a default
268       implementation which does nothing. */
flush(void)269   virtual void flush(void)
270   { ByteStream::Wrapper::flush(); }
271   /** This is a simple compare method.  The IFFByteStream may be read for
272       the sake of the comparison.  Since IFFByteStreams are non-seekable,
273       the stream is not valid for use after comparing, regardless of the
274       result. */
275   bool compare(IFFByteStream &iff);
276   /** #has_magic_att# is true if the stream has
277       the DjVu magic 'AT&T' marker. */
278   bool has_magic_att;
279   /** #has_magic_sdjv# is true if the stream has
280       the Celartem magic 'SDJV' marker. */
281   bool has_magic_sdjv;
282 private:
283   // private datatype
284   struct IFFContext
285   {
286     IFFContext *next;
287     long offStart;
288     long offEnd;
289     char idOne[4];
290     char idTwo[4];
291     char bComposite;
292   };
293   // Implementation
294   IFFContext *ctx;
295   long offset;
296   long seekto;
297   int dir;
298   // Cancel C++ default stuff
299   IFFByteStream(const IFFByteStream &);
300   IFFByteStream & operator=(const IFFByteStream &);
301   static GP<IFFByteStream> create(ByteStream *bs);
302 };
303 
304 //@}
305 
306 
307 
308 #ifdef HAVE_NAMESPACES
309 }
310 # ifndef NOT_USING_DJVU_NAMESPACE
311 using namespace DJVU;
312 # endif
313 #endif
314 #endif
315