1 //C- -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001 AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software. Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C- ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE." Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License. This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55
56 #ifndef _JB2IMAGE_H
57 #define _JB2IMAGE_H
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 #if NEED_GNUG_PRAGMAS
62 # pragma interface
63 #endif
64
65 /** @name JB2Image.h
66
67 Files #"JB2Image.h"# and #"JB2Image.cpp"# address the compression of
68 bilevel images using the JB2 soft pattern matching scheme. These files
69 provide the complete decoder and the decoder back-end. The JB2 scheme is
70 optimized for images containing a large number of self-similar small
71 components such as characters. Typical text images can be compressed into
72 files 3 to 5 times smaller than with G4/MMR and 2 to 4 times smaller than
73 with JBIG1.
74
75 {\bf JB2 and JBIG2} --- JB2 has strong similarities with the forthcoming
76 JBIG2 standard developed by the "ISO/IEC JTC1 SC29 Working Group 1" which
77 is responsible for both the JPEG and JBIG standards. This is hardly
78 surprising since JB2 was our own proposal for the JBIG2 standard
79 and remained the only proposal for years. The full JBIG2 standard however
80 is significantly more complex and slighlty less efficient than JB2 because
81 it addresses a broader range of applications. Full JBIG2 compliance may
82 be implemented in the future.
83
84 {\bf JB2 Images} --- Class \Ref{JB2Image} is the central data structure
85 implemented here. A #JB2Image# is composed of an array of shapes
86 and an array of blits. Each shape contains a small bitmap representing an
87 elementary blob of ink, such as a character or a segment of line art.
88 Each blit instructs the decoder to render a particular shape at a
89 specified position in the image. Some compression is already achieved
90 because several blits can refer to the same shape. A shape can also
91 contain a pointer to a parent shape. Additional compression is achieved
92 when both shapes are similar because each shape is encoded using the
93 parent shape as a model. A #"O"# shape for instance could be a parent for
94 both a #"C"# shape and a #"Q"# shape.
95
96 {\bf JB2 Dictionary} --- Class \Ref{JB2Dict} is a peculiar kind of
97 JB2Image which only contains an array of shapes. These shapes can be
98 referenced from another JB2Dict/JB2Image. This is arranged by setting the
99 ``inherited dictionary'' of a JB2Dict/JB2Image using function
100 \Ref{JB2Dict::set_inherited_dict}. Several JB2Images can use shapes from a
101 same JB2Dict encoded separately. This is how several pages of a same
102 document can share information.
103
104 {\bf Decoding JB2 data} --- The first step for decoding JB2 data consists of
105 creating an empty #JB2Image# object. Function \Ref{JB2Image::decode} then
106 reads the data and populates the #JB2Image# with the shapes and the blits.
107 Function \Ref{JB2Image::get_bitmap} finally produces an anti-aliased image.
108
109 {\bf Encoding JB2 data} --- The first step for decoding JB2 data also
110 consists of creating an empty #JB2Image# object. You must then use
111 functions \Ref{JB2Image::add_shape} and \Ref{JB2Image::add_blit} to
112 populate the #JB2Image# object. Function \Ref{JB2Image::encode} finally
113 produces the JB2 data. Function #encode# sequentially encodes the blits
114 and the necessary shapes. The compression ratio depends on several
115 factors:
116 \begin{itemize}
117 \item Blits should reuse shapes as often as possible.
118 \item Blits should be sorted in reading order because this facilitates
119 the prediction of the blit coordinates.
120 \item Shapes should be sorted according to the order of first appearance
121 in the sequence of blits because this facilitates the prediction of the
122 shape indices.
123 \item Shapes should be compared to all previous shapes in the shape array.
124 The shape parent pointer should be set to a suitable parent shape if
125 such a parent shape exists. The parent shape should have almost the
126 same size and the same pixels.
127 \end{itemize}
128 All this is quite easy to achieve in the case of an electronically
129 produced document such as a DVI file or a PS file: we know what the
130 characters are and where they are located. If you only have a scanned
131 image however you must first locate the characters (connected component
132 analysis) and cut the remaining pieces of ink into smaller blobs.
133 Ordering the blits and matching the shapes is then an essentially
134 heuristic process. Although the quality of the heuristics substantially
135 effects the file size, misordering blits or mismatching shapes never
136 effects the quality of the image. The last refinement consists in
137 smoothing the shapes in order to reduce the noise and maximize the
138 similarities between shapes.
139
140 {\bf JB2 extensions} --- Two extensions of the JB2
141 encoding format have been introduced with DjVu files version 21. The first
142 extension addresses the shared shape dictionaries. The second extension
143 bounds the number of probability contexts used for coding numbers.
144 Both extensions maintain backward compatibility with JB2 as
145 described in the ICFDD proposal. A more complete discussion
146 can be found in section \Ref{JB2 extensions for version 21.}.
147
148 {\bf References}
149 \begin{itemize}
150 \item Paul G. Howard : {\em Text image compression using soft
151 pattern matching}, Computer Journal, volume 40:2/3, 1997.
152 \item JBIG1 : \URL{http://www.jpeg.org/public/jbighomepage.htm}.
153 \item JBIG2 draft : \URL{http://www.jpeg.org/public/jbigpt2.htm}.
154 \item ICFDD Draft Proposed American National Standard, 1999-08-26.
155 \end{itemize}
156
157 @memo
158 Coding bilevel images with JB2.
159 @author
160 Paul Howard <pgh@research.att.com> -- JB2 design\\
161 L\'eon Bottou <leonb@research.att.com> -- this implementation
162
163 // From: Leon Bottou, 1/31/2002
164 // Lizardtech has split the corresponding cpp file into a decoder and an encoder.
165 // Only superficial changes. The meat is mine.
166
167 */
168 //@{
169
170
171 #include "GString.h"
172 #include "ZPCodec.h"
173
174
175 #ifdef HAVE_NAMESPACES
176 namespace DJVU {
177 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
178 }
179 #endif
180 #endif
181
182 class JB2Dict;
183 class JB2Image;
184 class GRect;
185 class GBitmap;
186 class ByteStream;
187
188 /** Blit data structure. A #JB2Image# contains an array of #JB2Blit# data
189 structures. Each array entry instructs the decoder to render a particular
190 shape at a particular location. Members #left# and #bottom# specify the
191 coordinates of the bottom left corner of the shape bitmap. All
192 coordinates are relative to the bottom left corner of the image. Member
193 #shapeno# is the subscript of the shape to be rendered. */
194
195 class DJVUAPI JB2Blit {
196 public:
197 /** Horizontal coordinate of the blit. */
198 unsigned short left;
199 /** Vertical coordinate of the blit. */
200 unsigned short bottom;
201 /** Index of the shape to blit. */
202 unsigned int shapeno;
203 };
204
205
206 /** Shape data structure. A #JB2Image# contains an array of #JB2Shape# data
207 structures. Each array entry represents an elementary blob of ink such as
208 a character or a segment of line art. Member #bits# points to a bilevel
209 image representing the shape pixels. Member #parent# is the subscript of
210 the parent shape. */
211
212 class DJVUAPI JB2Shape
213 {
214 public:
215 /** Subscript of the parent shape. The parent shape must always be located
216 before the current shape in the shape array. A negative value indicates
217 that this shape has no parent. Any negative values smaller than #-1#
218 further indicates that this shape does not look like a character. This
219 is used to enable a few internal optimizations. This information is
220 saved into the JB2 file, but the actual value of the #parent# variable
221 is not. */
222 int parent;
223 /** Bilevel image of the shape pixels. This must be a pointer to a bilevel
224 #GBitmap# image. This pointer can also be null. The encoder will just
225 silently discard all blits referring to a shape containing a null
226 bitmap. */
227 GP<GBitmap> bits;
228 /** Private user data. This long word is provided as a convenience for users
229 of the JB2Image data structures. Neither the rendering functions nor
230 the coding functions ever access this value. */
231 long userdata;
232 };
233
234
235
236 /** JB2 Dictionary callback.
237 The decoding function call this callback function when they discover that
238 the current JB2Image or JB2Dict needs a pre-existing shape dictionary.
239 The callback function must return a pointer to the dictionary or NULL
240 if none is found. */
241
242 typedef GP<JB2Dict> JB2DecoderCallback ( void* );
243
244
245 /** Dictionary of JB2 shapes. */
246
247 class DJVUAPI JB2Dict : public GPEnabled
248 {
249 protected:
250 JB2Dict(void);
251 public:
252 class JB2Codec;
253
254 // CONSTRUCTION
255 /** Default creator. Constructs an empty #JB2Dict# object. You can then
256 call the decoding function #decode#. You can also manually set the
257 image size using #add_shape#. */
258 static GP<JB2Dict> create(void);
259
260 // INITIALIZATION
261 /** Resets the #JB2Image# object. This function reinitializes both the shape
262 and the blit arrays. All allocated memory is freed. */
263 void init(void);
264
265 // INHERITED
266 /** Returns the inherited dictionary. */
267 GP<JB2Dict> get_inherited_dict(void) const;
268 /** Returns the number of inherited shapes. */
269 int get_inherited_shape_count(void) const;
270 /** Sets the inherited dictionary. */
271 void set_inherited_dict(const GP<JB2Dict> &dict);
272
273 // ACCESSING THE SHAPE LIBRARY
274 /** Returns the total number of shapes.
275 Shape indices range from #0# to #get_shape_count()-1#. */
276 int get_shape_count(void) const;
277 /** Returns a pointer to shape #shapeno#.
278 The returned pointer directly points into the shape array.
279 This pointer can be used for reading or writing the shape data. */
280 JB2Shape &get_shape(const int shapeno);
281 /** Returns a constant pointer to shape #shapeno#.
282 The returned pointer directly points into the shape array.
283 This pointer can only be used for reading the shape data. */
284 const JB2Shape &get_shape(const int shapeno) const;
285 /** Appends a shape to the shape array. This function appends a copy of
286 shape #shape# to the shape array and returns the subscript of the new
287 shape. The subscript of the parent shape #shape.parent# must
288 actually designate an already existing shape. */
289 int add_shape(const JB2Shape &shape);
290
291 // MEMORY OPTIMIZATION
292 /** Compresses all shape bitmaps. This function reduces the memory required
293 by the #JB2Image# by calling \Ref{GBitmap::compress} on all shapes
294 bitmaps. This function is best called after decoding a #JB2Image#,
295 because function \Ref{get_bitmap} can directly use the compressed
296 bitmaps. */
297 void compress(void);
298 /** Returns the total memory used by the JB2Image.
299 The returned value is expressed in bytes. */
300 unsigned int get_memory_usage(void) const;
301
302 // CODING
303 /** Encodes the JB2Dict into ByteStream #bs#.
304 This function generates the JB2 data stream without any header. */
305 void encode(const GP<ByteStream> &gbs) const;
306 /** Decodes JB2 data from ByteStream #bs#. This function decodes the image
307 size and populates the shape and blit arrays. The callback function
308 #cb# is called when the decoder determines that the ByteStream data
309 requires a shape dictionary which has not been set with
310 \Ref{JB2Dict::set_inherited_dict}. The callback receives argument #arg#
311 and must return a suitable dictionary which will be installed as the
312 inherited dictionary. The callback should return null if no such
313 dictionary is found. */
314 void decode(const GP<ByteStream> &gbs, JB2DecoderCallback *cb=0, void *arg=0);
315
316
317 public:
318 /** Comment string coded by JB2 file. */
319 GUTF8String comment;
320
321
322 private:
323 friend class JB2Codec;
324 int inherited_shapes;
325 GP<JB2Dict> inherited_dict;
326 GArray<JB2Shape> shapes;
327
328 struct LibRect {
329 int top,left,right,bottom;
330 void compute_bounding_box(const GBitmap &cbm);
331 };
332 GTArray<LibRect> boxes;
333 void get_bounding_box(int shapeno, LibRect &dest);
334 };
335
336 /** Main JB2 data structure. Each #JB2Image# consists of an array of shapes
337 and an array of blits. These arrays can be populated by hand using
338 functions \Ref{add_shape} and \Ref{add_blit}, or by decoding JB2 data
339 using function \Ref{decode}. You can then use function \Ref{get_bitmap}
340 to render anti-aliased images, or use function \Ref{encode} to generate
341 JB2 data. */
342
343 class DJVUAPI JB2Image : public JB2Dict
344 {
345 protected:
346 JB2Image(void);
347 public:
348
349 /** Creates an empty #JB2Image# object. You can then
350 call the decoding function #decode#. You can also manually set the
351 image size using #set_dimension# and populate the shape and blit arrays
352 using #add_shape# and #add_blit#. */
create(void)353 static GP<JB2Image> create(void) { return new JB2Image(); }
354
355 // INITIALIZATION
356 /** Resets the #JB2Image# object. This function reinitializes both the shape
357 and the blit arrays. All allocated memory is freed. */
358 void init(void);
359
360 // DIMENSION
361 /** Returns the width of the image.
362 This is the width value previously set with #set_dimension#. */
363 int get_width(void) const;
364 /** Returns the height of the image.
365 This is the height value previously set with #set_dimension#. */
366 int get_height(void) const;
367 /** Sets the size of the JB2Image.
368 This function can be called at any time.
369 The corresponding #width# and the #height# are stored
370 in the JB2 file. */
371 void set_dimension(int width, int height);
372
373 // RENDERING
374 /** Renders an anti-aliased gray level image. This function renders the
375 JB2Image as a bilevel or gray level image. Argument #subsample#
376 specifies the desired subsampling ratio in range #1# to #15#. The
377 returned image uses #1+subsample^2# gray levels for representing
378 anti-aliased edges. Argument #align# specified the alignment of the
379 rows of the returned images. Setting #align# to #4#, for instance, will
380 adjust the bitmap border in order to make sure that each row of the
381 returned image starts on a word (four byte) boundary. */
382 GP<GBitmap> get_bitmap(int subsample = 1, int align = 1) const;
383 /** Renders an anti-aliased gray level sub-image. This function renders a
384 segment of the JB2Image as a bilevel or gray level image. Conceptually,
385 this function first renders the full JB2Image with subsampling ratio
386 #subsample# and then extracts rectangle #rect# in the subsampled image.
387 Both operations of course are efficiently performed simultaneously.
388 Argument #align# specified the alignment of the rows of the returned
389 images, as explained above. Argument #dispy# should remain null. */
390 GP<GBitmap> get_bitmap(const GRect &rect, int subsample=1, int align=1, int dispy=0) const;
391
392 // ACCESSING THE BLIT LIBRARY
393 /** Returns the total number of blits.
394 Blit indices range from #0# to #get_blit_count(void)-1#. */
395 int get_blit_count(void) const;
396 /** Returns a pointer to blit #blitno#.
397 The returned pointer directly points into the blit array.
398 This pointer can be used for reading or writing the blit data. */
399 JB2Blit *get_blit(int blitno);
400 /** Returns a constant pointer to blit #blitno#.
401 The returned pointer directly points into the shape array.
402 This pointer can only be used for reading the shape data. */
403 const JB2Blit *get_blit(int blitno) const;
404 /** Appends a blit to the blit array. This function appends a copy of blit
405 #blit# to the blit array and returns the subscript of the new blit. The
406 shape subscript #blit.shapeno# must actually designate an already
407 existing shape. */
408 int add_blit(const JB2Blit &blit);
409
410 // MEMORY OPTIMIZATION
411 /** Returns the total memory used by the JB2Image.
412 The returned value is expressed in bytes. */
413 unsigned int get_memory_usage(void) const;
414
415 // CODING
416 /** Encodes the JB2Image into ByteStream #bs#.
417 This function generates the JB2 data stream without any header. */
418 void encode(const GP<ByteStream> &gbs) const;
419 /** Decodes JB2 data from ByteStream #bs#. This function decodes the image
420 size and populates the shape and blit arrays. The callback function
421 #cb# is called when the decoder determines that the ByteStream data
422 requires a shape dictionary which has not been set with
423 \Ref{JB2Dict::set_inherited_dict}. The callback receives argument #arg#
424 and must return a suitable dictionary which will be installed as the
425 inherited dictionary. The callback should return null if no such
426 dictionary is found. */
427 void decode(const GP<ByteStream> &gbs, JB2DecoderCallback *cb=0, void *arg=0);
428
429 private:
430 // Implementation
431 int width;
432 int height;
433 GTArray<JB2Blit> blits;
434 public:
435 /** Reproduces a old bug. Setting this flag may be necessary for accurately
436 decoding DjVu files with version smaller than #18#. The default value
437 is of couse #false#. */
438 bool reproduce_old_bug;
439 };
440
441
442
443 // JB2DICT INLINE FUNCTIONS
444
445 inline int
get_shape_count(void)446 JB2Dict::get_shape_count(void) const
447 {
448 return inherited_shapes + shapes.size();
449 }
450
451 inline int
get_inherited_shape_count(void)452 JB2Dict::get_inherited_shape_count(void) const
453 {
454 return inherited_shapes;
455 }
456
457 inline GP<JB2Dict>
get_inherited_dict(void)458 JB2Dict::get_inherited_dict(void) const
459 {
460 return inherited_dict;
461 }
462
463 // JB2IMAGE INLINE FUNCTIONS
464
465 inline int
get_width(void)466 JB2Image::get_width(void) const
467 {
468 return width;
469 }
470
471 inline int
get_height(void)472 JB2Image::get_height(void) const
473 {
474 return height;
475 }
476
477
478 inline int
get_blit_count(void)479 JB2Image::get_blit_count(void) const
480 {
481 return blits.size();
482 }
483
484
485 inline JB2Blit *
get_blit(int blitno)486 JB2Image::get_blit(int blitno)
487 {
488 return & blits[blitno];
489 }
490
491 inline const JB2Blit *
get_blit(int blitno)492 JB2Image::get_blit(int blitno) const
493 {
494 return & blits[blitno];
495 }
496
497
498
499
500
501 /** @name JB2 extensions for version 21.
502
503 Two extensions of the JB2 encoding format have been introduced
504 with DjVu files version 21. Both extensions maintain significant
505 backward compatibility with previous version of the JB2 format.
506 These extensions are described below by reference to the ICFDD
507 proposal dated August 1999. Both extension make use of the unused
508 record type value #9# (cf. ICFDD page 24) which has been renamed
509 #REQUIRED_DICT_OR_RESET#.
510
511 {\bf Shared Shape Dictionaries} --- This extension provides
512 support for sharing symbol definitions between the pages of a
513 document. To achieve this objective, the JB2 image data chunk
514 must be able to address symbols defined elsewhere by a JB2
515 dictionary data chunk shared by all the pages of a document.
516
517 The arithmetically encoded JB2 image data logically consist of a
518 sequence of records. The decoder processes these records in
519 sequence and maintains a library of symbols which can be addressed
520 by the following records. The first record usually is a ``Start
521 Of Image'' record describing the size of the image.
522
523 Starting with version 21, a #REQUIRED_DICT_OR_RESET# (9) record
524 type can appear {\em before} the #START_OF_DATA# (0) record. The
525 record type field is followed by a single number arithmetically
526 encoded (cf. ICFDD page 26) using a sixteenth context (cf. ICFDD
527 page 25). This record appears when the JB2 data chunk requires
528 symbols encoded in a separate JB2 dictionary data chunk. The
529 number (the {\bf dictionary size}) indicates how many symbols
530 should have been defined by the JB2 dictionary data chunk. The
531 decoder should simply load these symbols in the symbol library and
532 proceed as usual. New symbols potentially defined by the
533 subsequent JB2 image data records will therefore be numbered with
534 integers greater or equal than the dictionary size.
535
536 The JB2 dictionary data format is a pure subset of the JB2 image
537 data format. The #START_OF_DATA# (0) record always specifies an
538 image width of zero and an image height of zero. The only allowed
539 record types are those defining library symbols only
540 (#NEW_SYMBOL_LIBRARY_ONLY# (2) and #MATCHED_REFINE_LIBRARY_ONLY#
541 (5) cf. ICFDD page 24) followed by a final #END_OF_DATA# (11)
542 record.
543
544 The JB2 dictionary data is usually located in an {\bf Djbz} chunk.
545 Each page {\bf FORM:DJVU} may directly contain a {\bf Djbz} chunk,
546 or may indirectly point to such a chunk using an {\bf INCL} chunk
547 (cf. \Ref{Multipage DjVu documents.}).
548
549
550 {\bf Numcoder Reset} --- This extension addresses a problem for
551 hardware implementations. The encoding of numbers (cf. ICFDD page
552 26) potentially uses an unbounded number of binary coding
553 contexts. These contexts are normally allocated when they are used
554 for the first time (cf. ICFDD informative note, page 27).
555
556 Starting with version 21, a #REQUIRED_DICT_OR_RESET# (9) record
557 type can appear {\em after} the #START_OF_DATA# (0) record. The
558 decoder should proceed with the next record after {\em clearing
559 all binary contexts used for coding numbers}. This operation
560 implies that all binary contexts previously allocated for coding
561 numbers can be deallocated.
562
563 Starting with version 21, the JB2 encoder should insert a
564 #REQUIRED_DICT_OR_RESET# record type whenever the number of these
565 allocated binary contexts exceeds #20000#. Only very large
566 documents ever reach such a large number of allocated binary
567 contexts (e.g large maps). Hardware implementation however can
568 benefit greatly from a hard bound on the total number of binary
569 coding contexts. Old JB2 decoders will treat this record type as
570 an #END_OF_DATA# record and cleanly stop decoding (cf. ICFDD page
571 30, Image refinement data).
572
573
574 {\bf References} ---
575 \begin{itemize}
576 \item ICFDD Draft Proposed American National Standard, 1999-08-26.
577 \item DjVu Specification, \URL{http://www.lizardtech.com/djvu/sci/djvuspec}.
578 \end{itemize}
579
580 @memo Extensions to the JB2 format introduced in version 21. */
581
582 //@}
583
584 ////////////////////////////////////////
585 //// CLASS JB2CODEC: DECLARATION
586 ////////////////////////////////////////
587
588 // This class is accessed via the encode and decode
589 // functions of class JB2Image
590
591
592 //**** Class JB2Codec
593 // This class implements the base class for both the JB2 coder and decoder.
594 // The JB2Codec's Contains all contextual information for encoding/decoding
595 // a JB2Image.
596
597 class JB2Dict::JB2Codec
598 {
599 public:
600 class Decode;
601 class Encode;
602 typedef unsigned int NumContext;
603 virtual ~JB2Codec();
604 protected:
605 // Constructors
606 JB2Codec(const bool xencoding=false);
607 // Forbidden assignment
608 JB2Codec(const JB2Codec &ref);
609 JB2Codec& operator=(const JB2Codec &ref);
610
611 int CodeNum(int lo, int hi, NumContext *pctx,int v);
612 void reset_numcoder(void);
613 inline void code_eventual_lossless_refinement(void);
614 void init_library(JB2Dict &jim);
615 int add_library(const int shapeno, JB2Shape &jshp);
616 void code_relative_location(JB2Blit *jblt, int rows, int columns);
617 void code_bitmap_directly (GBitmap &bm);
618 void code_bitmap_by_cross_coding (GBitmap &bm, GP<GBitmap> &cbm, const int libno);
619 void code_record(int &rectype, const GP<JB2Dict> &jim, JB2Shape *jshp);
620 void code_record(int &rectype, const GP<JB2Image> &jim, JB2Shape *jshp, JB2Blit *jblt);
621 static void compute_bounding_box(GBitmap &cbm, LibRect &lrect);
622 static int get_direct_context( unsigned char const * const up2,
623 unsigned char const * const up1, unsigned char const * const up0,
624 const int column);
625 static int shift_direct_context ( const int context,
626 const int next, unsigned char const * const up2,
627 unsigned char const * const up1, unsigned char const * const up0,
628 const int column);
629 static int get_cross_context( unsigned char const * const up1,
630 unsigned char const * const up0, unsigned char const * const xup1,
631 unsigned char const * const xup0, unsigned char const * const xdn1,
632 const int column );
633 static int shift_cross_context( const int context,
634 const int n, unsigned char const * const up1,
635 unsigned char const * const up0, unsigned char const * const xup1,
636 unsigned char const * const xup0, unsigned char const * const xdn1,
637 const int column );
638
639 virtual bool CodeBit(const bool bit, BitContext &ctx) = 0;
640 virtual void code_comment(GUTF8String &comment) = 0;
641 virtual void code_record_type(int &rectype) = 0;
642 virtual int code_match_index(int &index, JB2Dict &jim)=0;
643 virtual void code_inherited_shape_count(JB2Dict &jim)=0;
644 virtual void code_image_size(JB2Dict &jim);
645 virtual void code_image_size(JB2Image &jim);
646 virtual void code_absolute_location(JB2Blit *jblt, int rows, int columns)=0;
647 virtual void code_absolute_mark_size(GBitmap &bm, int border=0) = 0;
648 virtual void code_relative_mark_size(GBitmap &bm, int cw, int ch, int border=0) = 0;
649 virtual void code_bitmap_directly(GBitmap &bm,const int dw, int dy,
650 unsigned char *up2, unsigned char *up1, unsigned char *up0 )=0;
651 virtual void code_bitmap_by_cross_coding (GBitmap &bm, GBitmap &cbm,
652 const int xd2c, const int dw, int dy, int cy,
653 unsigned char *up1, unsigned char *up0, unsigned char *xup1,
654 unsigned char *xup0, unsigned char *xdn1 )=0;
655 // Code records
656 virtual int get_diff(const int x_diff,NumContext &rel_loc) = 0;
657
658 private:
659 bool encoding;
660
661 protected:
662 // NumCoder
663 int cur_ncell;
664 BitContext *bitcells;
665 GPBuffer<BitContext> gbitcells;
666 NumContext *leftcell;
667 GPBuffer<NumContext> gleftcell;
668 NumContext *rightcell;
669 GPBuffer<NumContext> grightcell;
670 // Info
671 bool refinementp;
672 char gotstartrecordp;
673 // Code comment
674 NumContext dist_comment_byte;
675 NumContext dist_comment_length;
676 // Code values
677 NumContext dist_record_type;
678 NumContext dist_match_index;
679 BitContext dist_refinement_flag;
680 // Library
681 GTArray<int> shape2lib;
682 GTArray<int> lib2shape;
683 GTArray<LibRect> libinfo;
684 // Code pairs
685 NumContext abs_loc_x;
686 NumContext abs_loc_y;
687 NumContext abs_size_x;
688 NumContext abs_size_y;
689 NumContext image_size_dist;
690 NumContext inherited_shape_count_dist;
691 BitContext offset_type_dist;
692 NumContext rel_loc_x_current;
693 NumContext rel_loc_x_last;
694 NumContext rel_loc_y_current;
695 NumContext rel_loc_y_last;
696 NumContext rel_size_x;
697 NumContext rel_size_y;
698 int last_bottom;
699 int last_left;
700 int last_right;
701 int last_row_bottom;
702 int last_row_left;
703 int image_columns;
704 int image_rows;
705 int short_list[3];
706 int short_list_pos;
707 inline void fill_short_list(const int v);
708 int update_short_list(const int v);
709 // Code bitmaps
710 BitContext bitdist[1024];
711 BitContext cbitdist[2048];
712 };
713
714 inline void
code_eventual_lossless_refinement(void)715 JB2Dict::JB2Codec::code_eventual_lossless_refinement(void)
716 {
717 refinementp=CodeBit(refinementp, dist_refinement_flag);
718 }
719
720 inline void
fill_short_list(const int v)721 JB2Dict::JB2Codec::fill_short_list(const int v)
722 {
723 short_list[0] = short_list[1] = short_list[2] = v;
724 short_list_pos = 0;
725 }
726
727 inline int
get_direct_context(unsigned char const * const up2,unsigned char const * const up1,unsigned char const * const up0,const int column)728 JB2Dict::JB2Codec::get_direct_context( unsigned char const * const up2,
729 unsigned char const * const up1,
730 unsigned char const * const up0,
731 const int column)
732 {
733 return ( (up2[column - 1] << 9) |
734 (up2[column ] << 8) |
735 (up2[column + 1] << 7) |
736 (up1[column - 2] << 6) |
737 (up1[column - 1] << 5) |
738 (up1[column ] << 4) |
739 (up1[column + 1] << 3) |
740 (up1[column + 2] << 2) |
741 (up0[column - 2] << 1) |
742 (up0[column - 1] << 0) );
743 }
744
745 inline int
shift_direct_context(const int context,const int next,unsigned char const * const up2,unsigned char const * const up1,unsigned char const * const up0,const int column)746 JB2Dict::JB2Codec::shift_direct_context(const int context, const int next,
747 unsigned char const * const up2,
748 unsigned char const * const up1,
749 unsigned char const * const up0,
750 const int column)
751 {
752 return ( ((context << 1) & 0x37a) |
753 (up1[column + 2] << 2) |
754 (up2[column + 1] << 7) |
755 (next << 0) );
756 }
757
758 inline int
get_cross_context(unsigned char const * const up1,unsigned char const * const up0,unsigned char const * const xup1,unsigned char const * const xup0,unsigned char const * const xdn1,const int column)759 JB2Dict::JB2Codec::get_cross_context( unsigned char const * const up1,
760 unsigned char const * const up0,
761 unsigned char const * const xup1,
762 unsigned char const * const xup0,
763 unsigned char const * const xdn1,
764 const int column )
765 {
766 return ( ( up1[column - 1] << 10) |
767 ( up1[column ] << 9) |
768 ( up1[column + 1] << 8) |
769 ( up0[column - 1] << 7) |
770 (xup1[column ] << 6) |
771 (xup0[column - 1] << 5) |
772 (xup0[column ] << 4) |
773 (xup0[column + 1] << 3) |
774 (xdn1[column - 1] << 2) |
775 (xdn1[column ] << 1) |
776 (xdn1[column + 1] << 0) );
777 }
778
779 inline int
shift_cross_context(const int context,const int n,unsigned char const * const up1,unsigned char const * const up0,unsigned char const * const xup1,unsigned char const * const xup0,unsigned char const * const xdn1,const int column)780 JB2Dict::JB2Codec::shift_cross_context( const int context, const int n,
781 unsigned char const * const up1,
782 unsigned char const * const up0,
783 unsigned char const * const xup1,
784 unsigned char const * const xup0,
785 unsigned char const * const xdn1,
786 const int column )
787 {
788 return ( ((context<<1) & 0x636) |
789 ( up1[column + 1] << 8) |
790 (xup1[column ] << 6) |
791 (xup0[column + 1] << 3) |
792 (xdn1[column + 1] << 0) |
793 (n << 7) );
794 }
795
796 // ---------- THE END
797
798 #ifdef HAVE_NAMESPACES
799 }
800 # ifndef NOT_USING_DJVU_NAMESPACE
801 using namespace DJVU;
802 # endif
803 #endif
804 #endif
805
806