1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 #ifndef _JB2IMAGE_H
57 #define _JB2IMAGE_H
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 #if NEED_GNUG_PRAGMAS
62 # pragma interface
63 #endif
64 
65 /** @name JB2Image.h
66 
67     Files #"JB2Image.h"# and #"JB2Image.cpp"# address the compression of
68     bilevel images using the JB2 soft pattern matching scheme.  These files
69     provide the complete decoder and the decoder back-end.  The JB2 scheme is
70     optimized for images containing a large number of self-similar small
71     components such as characters.  Typical text images can be compressed into
72     files 3 to 5 times smaller than with G4/MMR and 2 to 4 times smaller than
73     with JBIG1.
74 
75     {\bf JB2 and JBIG2} --- JB2 has strong similarities with the forthcoming
76     JBIG2 standard developed by the "ISO/IEC JTC1 SC29 Working Group 1" which
77     is responsible for both the JPEG and JBIG standards.  This is hardly
78     surprising since JB2 was our own proposal for the JBIG2 standard
79     and remained the only proposal for years.  The full JBIG2 standard however
80     is significantly more complex and slighlty less efficient than JB2 because
81     it addresses a broader range of applications.  Full JBIG2 compliance may
82     be implemented in the future.
83 
84     {\bf JB2 Images} --- Class \Ref{JB2Image} is the central data structure
85     implemented here.  A #JB2Image# is composed of an array of shapes
86     and an array of blits.  Each shape contains a small bitmap representing an
87     elementary blob of ink, such as a character or a segment of line art.
88     Each blit instructs the decoder to render a particular shape at a
89     specified position in the image.  Some compression is already achieved
90     because several blits can refer to the same shape.  A shape can also
91     contain a pointer to a parent shape.  Additional compression is achieved
92     when both shapes are similar because each shape is encoded using the
93     parent shape as a model.  A #"O"# shape for instance could be a parent for
94     both a #"C"# shape and a #"Q"# shape.
95 
96     {\bf JB2 Dictionary} --- Class \Ref{JB2Dict} is a peculiar kind of
97     JB2Image which only contains an array of shapes.  These shapes can be
98     referenced from another JB2Dict/JB2Image.  This is arranged by setting the
99     ``inherited dictionary'' of a JB2Dict/JB2Image using function
100     \Ref{JB2Dict::set_inherited_dict}. Several JB2Images can use shapes from a
101     same JB2Dict encoded separately.  This is how several pages of a same
102     document can share information.
103 
104     {\bf Decoding JB2 data} --- The first step for decoding JB2 data consists of
105     creating an empty #JB2Image# object.  Function \Ref{JB2Image::decode} then
106     reads the data and populates the #JB2Image# with the shapes and the blits.
107     Function \Ref{JB2Image::get_bitmap} finally produces an anti-aliased image.
108 
109     {\bf Encoding JB2 data} --- The first step for decoding JB2 data also
110     consists of creating an empty #JB2Image# object.  You must then use
111     functions \Ref{JB2Image::add_shape} and \Ref{JB2Image::add_blit} to
112     populate the #JB2Image# object.  Function \Ref{JB2Image::encode} finally
113     produces the JB2 data.  Function #encode# sequentially encodes the blits
114     and the necessary shapes.  The compression ratio depends on several
115     factors:
116     \begin{itemize}
117     \item Blits should reuse shapes as often as possible.
118     \item Blits should be sorted in reading order because this facilitates
119           the prediction of the blit coordinates.
120     \item Shapes should be sorted according to the order of first appearance
121           in the sequence of blits because this facilitates the prediction of the
122           shape indices.
123     \item Shapes should be compared to all previous shapes in the shape array.
124           The shape parent pointer should be set to a suitable parent shape if
125           such a parent shape exists.  The parent shape should have almost the
126           same size and the same pixels.
127     \end{itemize}
128     All this is quite easy to achieve in the case of an electronically
129     produced document such as a DVI file or a PS file: we know what the
130     characters are and where they are located.  If you only have a scanned
131     image however you must first locate the characters (connected component
132     analysis) and cut the remaining pieces of ink into smaller blobs.
133     Ordering the blits and matching the shapes is then an essentially
134     heuristic process.  Although the quality of the heuristics substantially
135     effects the file size, misordering blits or mismatching shapes never
136     effects the quality of the image.  The last refinement consists in
137     smoothing the shapes in order to reduce the noise and maximize the
138     similarities between shapes.
139 
140     {\bf JB2 extensions} --- Two extensions of the JB2
141     encoding format have been introduced with DjVu files version 21. The first
142     extension addresses the shared shape dictionaries. The second extension
143     bounds the number of probability contexts used for coding numbers.
144     Both extensions maintain backward compatibility with JB2 as
145     described in the ICFDD proposal. A more complete discussion
146     can be found in section \Ref{JB2 extensions for version 21.}.
147 
148     {\bf References}
149     \begin{itemize}
150     \item Paul G. Howard : {\em Text image compression using soft
151           pattern matching}, Computer Journal, volume 40:2/3, 1997.
152     \item JBIG1 : \URL{http://www.jpeg.org/public/jbighomepage.htm}.
153     \item JBIG2 draft : \URL{http://www.jpeg.org/public/jbigpt2.htm}.
154     \item ICFDD Draft Proposed American National Standard, 1999-08-26.
155     \end{itemize}
156 
157     @memo
158     Coding bilevel images with JB2.
159     @author
160     Paul Howard <pgh@research.att.com> -- JB2 design\\
161     L\'eon Bottou <leonb@research.att.com> -- this implementation
162 
163 // From: Leon Bottou, 1/31/2002
164 // Lizardtech has split the corresponding cpp file into a decoder and an encoder.
165 // Only superficial changes.  The meat is mine.
166 
167 */
168 //@{
169 
170 
171 #include "GString.h"
172 #include "ZPCodec.h"
173 
174 
175 #ifdef HAVE_NAMESPACES
176 namespace DJVU {
177 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
178 }
179 #endif
180 #endif
181 
182 class JB2Dict;
183 class JB2Image;
184 class GRect;
185 class GBitmap;
186 class ByteStream;
187 
188 /** Blit data structure.  A #JB2Image# contains an array of #JB2Blit# data
189     structures.  Each array entry instructs the decoder to render a particular
190     shape at a particular location.  Members #left# and #bottom# specify the
191     coordinates of the bottom left corner of the shape bitmap.  All
192     coordinates are relative to the bottom left corner of the image.  Member
193     #shapeno# is the subscript of the shape to be rendered.  */
194 
195 class DJVUAPI JB2Blit {
196 public:
197   /** Horizontal coordinate of the blit. */
198   unsigned short left;
199   /** Vertical coordinate of the blit. */
200   unsigned short bottom;
201   /** Index of the shape to blit. */
202   unsigned int shapeno;
203 };
204 
205 
206 /** Shape data structure.  A #JB2Image# contains an array of #JB2Shape# data
207     structures.  Each array entry represents an elementary blob of ink such as
208     a character or a segment of line art.  Member #bits# points to a bilevel
209     image representing the shape pixels.  Member #parent# is the subscript of
210     the parent shape.  */
211 
212 class DJVUAPI JB2Shape
213 {
214 public:
215   /** Subscript of the parent shape.  The parent shape must always be located
216       before the current shape in the shape array.  A negative value indicates
217       that this shape has no parent.  Any negative values smaller than #-1#
218       further indicates that this shape does not look like a character.  This
219       is used to enable a few internal optimizations.  This information is
220       saved into the JB2 file, but the actual value of the #parent# variable
221       is not. */
222   int parent;
223   /** Bilevel image of the shape pixels.  This must be a pointer to a bilevel
224       #GBitmap# image.  This pointer can also be null. The encoder will just
225       silently discard all blits referring to a shape containing a null
226       bitmap. */
227   GP<GBitmap> bits;
228   /** Private user data. This long word is provided as a convenience for users
229       of the JB2Image data structures.  Neither the rendering functions nor
230       the coding functions ever access this value. */
231   long userdata;
232 };
233 
234 
235 
236 /** JB2 Dictionary callback.
237     The decoding function call this callback function when they discover that
238     the current JB2Image or JB2Dict needs a pre-existing shape dictionary.
239     The callback function must return a pointer to the dictionary or NULL
240     if none is found. */
241 
242 typedef GP<JB2Dict> JB2DecoderCallback ( void* );
243 
244 
245 /** Dictionary of JB2 shapes. */
246 
247 class DJVUAPI JB2Dict : public GPEnabled
248 {
249 protected:
250   JB2Dict(void);
251 public:
252   class JB2Codec;
253 
254   // CONSTRUCTION
255   /** Default creator.  Constructs an empty #JB2Dict# object.  You can then
256       call the decoding function #decode#.  You can also manually set the
257       image size using #add_shape#. */
258   static GP<JB2Dict> create(void);
259 
260   // INITIALIZATION
261   /** Resets the #JB2Image# object.  This function reinitializes both the shape
262      and the blit arrays.  All allocated memory is freed. */
263   void init(void);
264 
265   // INHERITED
266   /** Returns the inherited dictionary. */
267   GP<JB2Dict> get_inherited_dict(void) const;
268   /** Returns the number of inherited shapes. */
269   int get_inherited_shape_count(void) const;
270   /** Sets the inherited dictionary. */
271   void set_inherited_dict(const GP<JB2Dict> &dict);
272 
273   // ACCESSING THE SHAPE LIBRARY
274   /** Returns the total number of shapes.
275       Shape indices range from #0# to #get_shape_count()-1#. */
276   int get_shape_count(void) const;
277   /** Returns a pointer to shape #shapeno#.
278       The returned pointer directly points into the shape array.
279       This pointer can be used for reading or writing the shape data. */
280   JB2Shape &get_shape(const int shapeno);
281   /** Returns a constant pointer to shape #shapeno#.
282       The returned pointer directly points into the shape array.
283       This pointer can only be used for reading the shape data. */
284   const JB2Shape &get_shape(const int shapeno) const;
285   /** Appends a shape to the shape array.  This function appends a copy of
286       shape #shape# to the shape array and returns the subscript of the new
287       shape.  The subscript of the parent shape #shape.parent# must
288       actually designate an already existing shape. */
289   int  add_shape(const JB2Shape &shape);
290 
291   // MEMORY OPTIMIZATION
292   /** Compresses all shape bitmaps.  This function reduces the memory required
293       by the #JB2Image# by calling \Ref{GBitmap::compress} on all shapes
294       bitmaps.  This function is best called after decoding a #JB2Image#,
295       because function \Ref{get_bitmap} can directly use the compressed
296       bitmaps.  */
297   void compress(void);
298   /** Returns the total memory used by the JB2Image.
299       The returned value is expressed in bytes. */
300   unsigned int get_memory_usage(void) const;
301 
302   // CODING
303   /** Encodes the JB2Dict into ByteStream #bs#.
304       This function generates the JB2 data stream without any header.   */
305   void encode(const GP<ByteStream> &gbs) const;
306   /** Decodes JB2 data from ByteStream #bs#. This function decodes the image
307       size and populates the shape and blit arrays.  The callback function
308       #cb# is called when the decoder determines that the ByteStream data
309       requires a shape dictionary which has not been set with
310       \Ref{JB2Dict::set_inherited_dict}. The callback receives argument #arg#
311       and must return a suitable dictionary which will be installed as the
312       inherited dictionary.  The callback should return null if no such
313       dictionary is found. */
314   void decode(const GP<ByteStream> &gbs, JB2DecoderCallback *cb=0, void *arg=0);
315 
316 
317 public:
318   /** Comment string coded by JB2 file. */
319   GUTF8String comment;
320 
321 
322 private:
323   friend class JB2Codec;
324   int inherited_shapes;
325   GP<JB2Dict> inherited_dict;
326   GArray<JB2Shape> shapes;
327 
328   struct LibRect {
329     int top,left,right,bottom;
330     void compute_bounding_box(const GBitmap &cbm);
331   };
332   GTArray<LibRect> boxes;
333   void get_bounding_box(int shapeno, LibRect &dest);
334 };
335 
336 /** Main JB2 data structure.  Each #JB2Image# consists of an array of shapes
337     and an array of blits.  These arrays can be populated by hand using
338     functions \Ref{add_shape} and \Ref{add_blit}, or by decoding JB2 data
339     using function \Ref{decode}.  You can then use function \Ref{get_bitmap}
340     to render anti-aliased images, or use function \Ref{encode} to generate
341     JB2 data. */
342 
343 class DJVUAPI JB2Image : public JB2Dict
344 {
345 protected:
346   JB2Image(void);
347 public:
348 
349   /** Creates an empty #JB2Image# object.  You can then
350       call the decoding function #decode#.  You can also manually set the
351       image size using #set_dimension# and populate the shape and blit arrays
352       using #add_shape# and #add_blit#. */
create(void)353   static GP<JB2Image> create(void) { return new JB2Image(); }
354 
355   // INITIALIZATION
356   /** Resets the #JB2Image# object.  This function reinitializes both the shape
357      and the blit arrays.  All allocated memory is freed. */
358   void init(void);
359 
360   // DIMENSION
361   /** Returns the width of the image.
362       This is the width value previously set with #set_dimension#. */
363   int get_width(void) const;
364   /** Returns the height of the image.
365       This is the height value previously set with #set_dimension#. */
366   int get_height(void) const;
367   /** Sets the size of the JB2Image.
368       This function can be called at any time.
369       The corresponding #width# and the #height# are stored
370       in the JB2 file. */
371   void set_dimension(int width, int height);
372 
373   // RENDERING
374   /** Renders an anti-aliased gray level image.  This function renders the
375       JB2Image as a bilevel or gray level image.  Argument #subsample#
376       specifies the desired subsampling ratio in range #1# to #15#.  The
377       returned image uses #1+subsample^2# gray levels for representing
378       anti-aliased edges.  Argument #align# specified the alignment of the
379       rows of the returned images.  Setting #align# to #4#, for instance, will
380       adjust the bitmap border in order to make sure that each row of the
381       returned image starts on a word (four byte) boundary. */
382   GP<GBitmap> get_bitmap(int subsample = 1, int align = 1) const;
383   /** Renders an anti-aliased gray level sub-image.  This function renders a
384       segment of the JB2Image as a bilevel or gray level image.  Conceptually,
385       this function first renders the full JB2Image with subsampling ratio
386       #subsample# and then extracts rectangle #rect# in the subsampled image.
387       Both operations of course are efficiently performed simultaneously.
388       Argument #align# specified the alignment of the rows of the returned
389       images, as explained above.  Argument #dispy# should remain null. */
390   GP<GBitmap> get_bitmap(const GRect &rect, int subsample=1, int align=1, int dispy=0) const;
391 
392   // ACCESSING THE BLIT LIBRARY
393   /** Returns the total number of blits.
394       Blit indices range from #0# to #get_blit_count(void)-1#. */
395   int get_blit_count(void) const;
396   /** Returns a pointer to blit #blitno#.
397       The returned pointer directly points into the blit array.
398       This pointer can be used for reading or writing the blit data. */
399   JB2Blit *get_blit(int blitno);
400   /** Returns a constant pointer to blit #blitno#.
401       The returned pointer directly points into the shape array.
402       This pointer can only be used for reading the shape data. */
403   const JB2Blit *get_blit(int blitno) const;
404   /** Appends a blit to the blit array.  This function appends a copy of blit
405       #blit# to the blit array and returns the subscript of the new blit.  The
406       shape subscript #blit.shapeno# must actually designate an already
407       existing shape. */
408   int  add_blit(const JB2Blit &blit);
409 
410   // MEMORY OPTIMIZATION
411   /** Returns the total memory used by the JB2Image.
412       The returned value is expressed in bytes. */
413   unsigned int get_memory_usage(void) const;
414 
415   // CODING
416   /** Encodes the JB2Image into ByteStream #bs#.
417       This function generates the JB2 data stream without any header. */
418   void encode(const GP<ByteStream> &gbs) const;
419   /** Decodes JB2 data from ByteStream #bs#. This function decodes the image
420       size and populates the shape and blit arrays.  The callback function
421       #cb# is called when the decoder determines that the ByteStream data
422       requires a shape dictionary which has not been set with
423       \Ref{JB2Dict::set_inherited_dict}. The callback receives argument #arg#
424       and must return a suitable dictionary which will be installed as the
425       inherited dictionary.  The callback should return null if no such
426       dictionary is found. */
427   void decode(const GP<ByteStream> &gbs, JB2DecoderCallback *cb=0, void *arg=0);
428 
429 private:
430   // Implementation
431   int width;
432   int height;
433   GTArray<JB2Blit> blits;
434 public:
435   /** Reproduces a old bug.  Setting this flag may be necessary for accurately
436       decoding DjVu files with version smaller than #18#.  The default value
437       is of couse #false#. */
438   bool reproduce_old_bug;
439 };
440 
441 
442 
443 // JB2DICT INLINE FUNCTIONS
444 
445 inline int
get_shape_count(void)446 JB2Dict::get_shape_count(void) const
447 {
448   return inherited_shapes + shapes.size();
449 }
450 
451 inline int
get_inherited_shape_count(void)452 JB2Dict::get_inherited_shape_count(void) const
453 {
454   return inherited_shapes;
455 }
456 
457 inline GP<JB2Dict>
get_inherited_dict(void)458 JB2Dict::get_inherited_dict(void) const
459 {
460   return inherited_dict;
461 }
462 
463 // JB2IMAGE INLINE FUNCTIONS
464 
465 inline int
get_width(void)466 JB2Image::get_width(void) const
467 {
468   return width;
469 }
470 
471 inline int
get_height(void)472 JB2Image::get_height(void) const
473 {
474   return height;
475 }
476 
477 
478 inline int
get_blit_count(void)479 JB2Image::get_blit_count(void) const
480 {
481   return blits.size();
482 }
483 
484 
485 inline JB2Blit *
get_blit(int blitno)486 JB2Image::get_blit(int blitno)
487 {
488   return & blits[blitno];
489 }
490 
491 inline const JB2Blit *
get_blit(int blitno)492 JB2Image::get_blit(int blitno) const
493 {
494   return & blits[blitno];
495 }
496 
497 
498 
499 
500 
501 /** @name JB2 extensions for version 21.
502 
503     Two extensions of the JB2 encoding format have been introduced
504     with DjVu files version 21.  Both extensions maintain significant
505     backward compatibility with previous version of the JB2 format.
506     These extensions are described below by reference to the ICFDD
507     proposal dated August 1999.  Both extension make use of the unused
508     record type value #9# (cf. ICFDD page 24) which has been renamed
509     #REQUIRED_DICT_OR_RESET#.
510 
511     {\bf Shared Shape Dictionaries} --- This extension provides
512     support for sharing symbol definitions between the pages of a
513     document.  To achieve this objective, the JB2 image data chunk
514     must be able to address symbols defined elsewhere by a JB2
515     dictionary data chunk shared by all the pages of a document.
516 
517     The arithmetically encoded JB2 image data logically consist of a
518     sequence of records. The decoder processes these records in
519     sequence and maintains a library of symbols which can be addressed
520     by the following records.  The first record usually is a ``Start
521     Of Image'' record describing the size of the image.
522 
523     Starting with version 21, a #REQUIRED_DICT_OR_RESET# (9) record
524     type can appear {\em before} the #START_OF_DATA# (0) record.  The
525     record type field is followed by a single number arithmetically
526     encoded (cf. ICFDD page 26) using a sixteenth context (cf. ICFDD
527     page 25).  This record appears when the JB2 data chunk requires
528     symbols encoded in a separate JB2 dictionary data chunk.  The
529     number (the {\bf dictionary size}) indicates how many symbols
530     should have been defined by the JB2 dictionary data chunk.  The
531     decoder should simply load these symbols in the symbol library and
532     proceed as usual.  New symbols potentially defined by the
533     subsequent JB2 image data records will therefore be numbered with
534     integers greater or equal than the dictionary size.
535 
536     The JB2 dictionary data format is a pure subset of the JB2 image
537     data format.  The #START_OF_DATA# (0) record always specifies an
538     image width of zero and an image height of zero.  The only allowed
539     record types are those defining library symbols only
540     (#NEW_SYMBOL_LIBRARY_ONLY# (2) and #MATCHED_REFINE_LIBRARY_ONLY#
541     (5) cf. ICFDD page 24) followed by a final #END_OF_DATA# (11)
542     record.
543 
544     The JB2 dictionary data is usually located in an {\bf Djbz} chunk.
545     Each page {\bf FORM:DJVU} may directly contain a {\bf Djbz} chunk,
546     or may indirectly point to such a chunk using an {\bf INCL} chunk
547     (cf. \Ref{Multipage DjVu documents.}).
548 
549 
550     {\bf Numcoder Reset} --- This extension addresses a problem for
551     hardware implementations.  The encoding of numbers (cf. ICFDD page
552     26) potentially uses an unbounded number of binary coding
553     contexts. These contexts are normally allocated when they are used
554     for the first time (cf. ICFDD informative note, page 27).
555 
556     Starting with version 21, a #REQUIRED_DICT_OR_RESET# (9) record
557     type can appear {\em after} the #START_OF_DATA# (0) record.  The
558     decoder should proceed with the next record after {\em clearing
559     all binary contexts used for coding numbers}.  This operation
560     implies that all binary contexts previously allocated for coding
561     numbers can be deallocated.
562 
563     Starting with version 21, the JB2 encoder should insert a
564     #REQUIRED_DICT_OR_RESET# record type whenever the number of these
565     allocated binary contexts exceeds #20000#.  Only very large
566     documents ever reach such a large number of allocated binary
567     contexts (e.g large maps).  Hardware implementation however can
568     benefit greatly from a hard bound on the total number of binary
569     coding contexts.  Old JB2 decoders will treat this record type as
570     an #END_OF_DATA# record and cleanly stop decoding (cf. ICFDD page
571     30, Image refinement data).
572 
573 
574     {\bf References} ---
575     \begin{itemize}
576     \item ICFDD Draft Proposed American National Standard, 1999-08-26.
577     \item DjVu Specification, \URL{http://www.lizardtech.com/djvu/sci/djvuspec}.
578     \end{itemize}
579 
580     @memo Extensions to the JB2 format introduced in version 21.  */
581 
582 //@}
583 
584 ////////////////////////////////////////
585 //// CLASS JB2CODEC:  DECLARATION
586 ////////////////////////////////////////
587 
588 // This class is accessed via the encode and decode
589 // functions of class JB2Image
590 
591 
592 //**** Class JB2Codec
593 // This class implements the base class for both the JB2 coder and decoder.
594 // The JB2Codec's Contains all contextual information for encoding/decoding
595 // a JB2Image.
596 
597 class JB2Dict::JB2Codec
598 {
599 public:
600   class Decode;
601   class Encode;
602   typedef unsigned int NumContext;
603   virtual ~JB2Codec();
604 protected:
605   // Constructors
606   JB2Codec(const bool xencoding=false);
607   // Forbidden assignment
608   JB2Codec(const JB2Codec &ref);
609   JB2Codec& operator=(const JB2Codec &ref);
610 
611   int CodeNum(int lo, int hi, NumContext *pctx,int v);
612   void reset_numcoder(void);
613   inline void code_eventual_lossless_refinement(void);
614   void init_library(JB2Dict &jim);
615   int add_library(const int shapeno, JB2Shape &jshp);
616   void code_relative_location(JB2Blit *jblt, int rows, int columns);
617   void code_bitmap_directly (GBitmap &bm);
618   void code_bitmap_by_cross_coding (GBitmap &bm, GP<GBitmap> &cbm, const int libno);
619   void code_record(int &rectype, const GP<JB2Dict> &jim, JB2Shape *jshp);
620   void code_record(int &rectype, const GP<JB2Image> &jim, JB2Shape *jshp, JB2Blit *jblt);
621   static void compute_bounding_box(GBitmap &cbm, LibRect &lrect);
622   static int get_direct_context( unsigned char const * const up2,
623     unsigned char const * const up1, unsigned char const * const up0,
624     const int column);
625   static int shift_direct_context ( const int context,
626     const int next, unsigned char const * const up2,
627     unsigned char const * const up1, unsigned char const * const up0,
628     const int column);
629   static int get_cross_context( unsigned char const * const up1,
630     unsigned char const * const up0, unsigned char const * const xup1,
631     unsigned char const * const xup0, unsigned char const * const xdn1,
632     const int column );
633   static int shift_cross_context( const int context,
634     const int n, unsigned char const * const up1,
635     unsigned char const * const up0, unsigned char const * const xup1,
636     unsigned char const * const xup0, unsigned char const * const xdn1,
637     const int column );
638 
639   virtual bool CodeBit(const bool bit, BitContext &ctx) = 0;
640   virtual void code_comment(GUTF8String &comment) = 0;
641   virtual void code_record_type(int &rectype) = 0;
642   virtual int code_match_index(int &index, JB2Dict &jim)=0;
643   virtual void code_inherited_shape_count(JB2Dict &jim)=0;
644   virtual void code_image_size(JB2Dict &jim);
645   virtual void code_image_size(JB2Image &jim);
646   virtual void code_absolute_location(JB2Blit *jblt,  int rows, int columns)=0;
647   virtual void code_absolute_mark_size(GBitmap &bm, int border=0) = 0;
648   virtual void code_relative_mark_size(GBitmap &bm, int cw, int ch, int border=0) = 0;
649   virtual void code_bitmap_directly(GBitmap &bm,const int dw, int dy,
650     unsigned char *up2, unsigned char *up1, unsigned char *up0 )=0;
651   virtual void code_bitmap_by_cross_coding (GBitmap &bm, GBitmap &cbm,
652     const int xd2c, const int dw, int dy, int cy,
653     unsigned char *up1, unsigned char *up0, unsigned char *xup1,
654     unsigned char *xup0, unsigned char *xdn1 )=0;
655   // Code records
656   virtual int get_diff(const int x_diff,NumContext &rel_loc) = 0;
657 
658 private:
659   bool encoding;
660 
661 protected:
662   // NumCoder
663   int cur_ncell;
664   BitContext *bitcells;
665   GPBuffer<BitContext> gbitcells;
666   NumContext *leftcell;
667   GPBuffer<NumContext> gleftcell;
668   NumContext *rightcell;
669   GPBuffer<NumContext> grightcell;
670   // Info
671   bool refinementp;
672   char gotstartrecordp;
673   // Code comment
674   NumContext dist_comment_byte;
675   NumContext dist_comment_length;
676   // Code values
677   NumContext dist_record_type;
678   NumContext dist_match_index;
679   BitContext dist_refinement_flag;
680   // Library
681   GTArray<int> shape2lib;
682   GTArray<int> lib2shape;
683   GTArray<LibRect> libinfo;
684   // Code pairs
685   NumContext abs_loc_x;
686   NumContext abs_loc_y;
687   NumContext abs_size_x;
688   NumContext abs_size_y;
689   NumContext image_size_dist;
690   NumContext inherited_shape_count_dist;
691   BitContext offset_type_dist;
692   NumContext rel_loc_x_current;
693   NumContext rel_loc_x_last;
694   NumContext rel_loc_y_current;
695   NumContext rel_loc_y_last;
696   NumContext rel_size_x;
697   NumContext rel_size_y;
698   int last_bottom;
699   int last_left;
700   int last_right;
701   int last_row_bottom;
702   int last_row_left;
703   int image_columns;
704   int image_rows;
705   int short_list[3];
706   int short_list_pos;
707   inline void fill_short_list(const int v);
708   int update_short_list(const int v);
709   // Code bitmaps
710   BitContext bitdist[1024];
711   BitContext cbitdist[2048];
712 };
713 
714 inline void
code_eventual_lossless_refinement(void)715 JB2Dict::JB2Codec::code_eventual_lossless_refinement(void)
716 {
717   refinementp=CodeBit(refinementp, dist_refinement_flag);
718 }
719 
720 inline void
fill_short_list(const int v)721 JB2Dict::JB2Codec::fill_short_list(const int v)
722 {
723   short_list[0] = short_list[1] = short_list[2] = v;
724   short_list_pos = 0;
725 }
726 
727 inline int
get_direct_context(unsigned char const * const up2,unsigned char const * const up1,unsigned char const * const up0,const int column)728 JB2Dict::JB2Codec::get_direct_context( unsigned char const * const up2,
729                     unsigned char const * const up1,
730                     unsigned char const * const up0,
731                     const int column)
732 {
733   return ( (up2[column - 1] << 9) |
734               (up2[column    ] << 8) |
735               (up2[column + 1] << 7) |
736               (up1[column - 2] << 6) |
737               (up1[column - 1] << 5) |
738               (up1[column    ] << 4) |
739               (up1[column + 1] << 3) |
740               (up1[column + 2] << 2) |
741               (up0[column - 2] << 1) |
742               (up0[column - 1] << 0) );
743 }
744 
745 inline int
shift_direct_context(const int context,const int next,unsigned char const * const up2,unsigned char const * const up1,unsigned char const * const up0,const int column)746 JB2Dict::JB2Codec::shift_direct_context(const int context, const int next,
747                      unsigned char const * const up2,
748                      unsigned char const * const up1,
749                      unsigned char const * const up0,
750                      const int column)
751 {
752   return ( ((context << 1) & 0x37a) |
753               (up1[column + 2] << 2)   |
754               (up2[column + 1] << 7)   |
755               (next << 0)              );
756 }
757 
758 inline int
get_cross_context(unsigned char const * const up1,unsigned char const * const up0,unsigned char const * const xup1,unsigned char const * const xup0,unsigned char const * const xdn1,const int column)759 JB2Dict::JB2Codec::get_cross_context( unsigned char const * const up1,
760                    unsigned char const * const up0,
761                    unsigned char const * const xup1,
762                    unsigned char const * const xup0,
763                    unsigned char const * const xdn1,
764                    const int column )
765 {
766   return ( ( up1[column - 1] << 10) |
767               ( up1[column    ] <<  9) |
768               ( up1[column + 1] <<  8) |
769               ( up0[column - 1] <<  7) |
770               (xup1[column    ] <<  6) |
771               (xup0[column - 1] <<  5) |
772               (xup0[column    ] <<  4) |
773               (xup0[column + 1] <<  3) |
774               (xdn1[column - 1] <<  2) |
775               (xdn1[column    ] <<  1) |
776               (xdn1[column + 1] <<  0) );
777 }
778 
779 inline int
shift_cross_context(const int context,const int n,unsigned char const * const up1,unsigned char const * const up0,unsigned char const * const xup1,unsigned char const * const xup0,unsigned char const * const xdn1,const int column)780 JB2Dict::JB2Codec::shift_cross_context( const int context, const int n,
781                      unsigned char const * const up1,
782                      unsigned char const * const up0,
783                      unsigned char const * const xup1,
784                      unsigned char const * const xup0,
785                      unsigned char const * const xdn1,
786                      const int column )
787 {
788   return ( ((context<<1) & 0x636)  |
789               ( up1[column + 1] << 8) |
790               (xup1[column    ] << 6) |
791               (xup0[column + 1] << 3) |
792               (xdn1[column + 1] << 0) |
793               (n << 7)             );
794 }
795 
796 // ---------- THE END
797 
798 #ifdef HAVE_NAMESPACES
799 }
800 # ifndef NOT_USING_DJVU_NAMESPACE
801 using namespace DJVU;
802 # endif
803 #endif
804 #endif
805 
806