1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 #ifndef _UNICODEBYTESTREAM_H_
57 #define _UNICODEBYTESTREAM_H_
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 #if NEED_GNUG_PRAGMAS
62 # pragma interface
63 #endif
64 
65 
66 /** @name UnicodeByteStream.h
67 
68     Files #"UnicodeByteStream.h"# and #"UnicodeByteStream.cpp"#
69     implement a parser for files structured W3C Extensible Markup
70     Language (XML) 1.0 (Second Edition).
71 
72     Class \Ref{UnicodeByteStream} provides a way to read or write XML files.
73     files.  Member functions provide an easy mean to position the underlying
74     \Ref{ByteStream}.
75 
76     {\bf References:}
77     W3C Extensible Markup Language (XML) 1.0 (Second Edition)
78     \URL{http://www.w3.org/TR/2000/REC-xml-20001006.html}
79 
80     @memo
81     XML file parser.
82     @author
83     Bill C Riemers <docbill@sourceforge.net>
84 */
85 //@{
86 
87 #include "DjVuGlobal.h"
88 #include "GString.h"
89 #include "ByteStream.h"
90 
91 #include <stddef.h>
92 
93 #ifdef HAVE_NAMESPACES
94 namespace DJVU {
95 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
96 }
97 #endif
98 #endif
99 
100 
101 
102 /** ByteStream interface for an Unicode file.
103 
104     Class #UnicodeByteStream# augments the #ByteStream# interface with
105     functions for navigating Unicode documents.  It works in relation
106     with a ByteStream specified at construction time.
107 
108     {\bf Reading an Unicode file} --- You can read an Unicode file by
109     constructing an #UnicodeByteStream# object attached to the ByteStream
110     containing the Unicode file.
111 
112     {\bf Writing an Unicode file} --- You can write an Unicode file by
113     constructing an #UnicodeByteStream# object attached to the seekable
114     ByteStream object that will contain the XML file.
115 
116     Writing an XML file requires a seekable ByteStream (see
117     \Ref{ByteStream::is_seekable}).  This is not much of a problem because you
118     can always create the XML file into a \Ref{MemoryByteStream} and then use
119     \Ref{ByteStream::copy} to transfer the XML file into a non seekable
120     ByteStream.  */
121 
122 class UnicodeByteStream : public ByteStream
123 {
124 protected:
125   UnicodeByteStream(const UnicodeByteStream &bs);
126   UnicodeByteStream(GP<ByteStream> bs,
127     const GStringRep::EncodeType encodetype=GStringRep::XUTF8);
128 public:
129   /** Constructs an UnicodeByteStream object attached to ByteStream #bs#.
130       Any ByteStream can be used when reading an XML file.  Writing
131       an XML file however requires a seekable ByteStream. */
132   static GP<UnicodeByteStream> create(GP<ByteStream> bs,
133     const GStringRep::EncodeType encodetype=GStringRep::XUTF8)
134   { return new UnicodeByteStream(bs,encodetype); }
135 
136   // --- BYTESTREAM INTERFACE
137   ~UnicodeByteStream();
138   /// Sets the encoding type and seek's to position 0.
139   void set_encodetype(const GStringRep::EncodeType et=GStringRep::XUTF8);
140   void set_encoding(const GUTF8String &encoding);
141   /// Simmular to fgets(), except read aheads effect the tell() position.
142   virtual GUTF8String gets(size_t const t=0,unsigned long const stopat='\n',bool const inclusive=true);
143   /// Resets the gets buffering as well as physically seeking.
144   virtual int seek(long offset, int whence = SEEK_SET, bool nothrow=false);
145   /** Physically reads the specified bytes, and truncate the read ahead buffer.
146     */
147   virtual size_t read(void *buffer, size_t size);
148   /// Not correctly implimented...
149   virtual size_t write(const void *buffer, size_t size);
150   /// tell will tell you the read position, including read ahead for gets()...
151   virtual long tell(void) const;
152   /// Does a flush, and clears the read ahead buffer.
153   virtual void flush(void);
154 
155   /// Find out how many lines have been read with gets.
get_lines_read(void)156   int get_lines_read(void) const { return linesread; }
157 protected:
158   /// The real byte stream.
159   GP<ByteStream> bs;
160   GUTF8String buffer;
161   int bufferpos;
162   int linesread;
163   long startpos;
164 private:
165   // Cancel C++ default stuff
166   UnicodeByteStream & operator=(UnicodeByteStream &);
167 };
168 
169 
170 class XMLByteStream : public UnicodeByteStream
171 {
172 protected:
173   XMLByteStream(GP<ByteStream> &bs);
174   XMLByteStream(UnicodeByteStream &bs);
175   void init(void);
176 public:
177   static GP<XMLByteStream> create(GP<ByteStream> bs);
178   static GP<XMLByteStream> create(UnicodeByteStream &bs);
179   // --- BYTESTREAM INTERFACE
180   ~XMLByteStream();
181 };
182 
183 inline GP<XMLByteStream>
create(UnicodeByteStream & bs)184 XMLByteStream::create(UnicodeByteStream &bs)
185 {
186   return new XMLByteStream(bs);
187 }
188 
189 //@}
190 
191 
192 #ifdef HAVE_NAMESPACES
193 }
194 # ifndef NOT_USING_DJVU_NAMESPACE
195 using namespace DJVU;
196 # endif
197 #endif
198 #endif
199 
200