1 /*
2  *  Arithmetic encoder and decoder of the portable JBIG
3  *  compression library
4  *
5  *  Markus Kuhn -- http://www.cl.cam.ac.uk/~mgk25/jbigkit/
6  *
7  *  This module implements a portable standard C arithmetic encoder
8  *  and decoder used by the JBIG lossless bi-level image compression
9  *  algorithm as specified in International Standard ISO 11544:1993
10  *  and ITU-T Recommendation T.82.
11  *
12  *  This program is free software; you can redistribute it and/or modify
13  *  it under the terms of the GNU General Public License as published by
14  *  the Free Software Foundation; either version 2 of the License, or
15  *  (at your option) any later version.
16  *
17  *  This program is distributed in the hope that it will be useful,
18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *  GNU General Public License for more details.
21  *
22  *  You should have received a copy of the GNU General Public License
23  *  along with this program; if not, write to the Free Software
24  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  *
26  *  If you want to use this program under different license conditions,
27  *  then contact the author for an arrangement.
28  */
29 
30 #include <assert.h>
31 #include "jbig_ar.h"
32 
33 /*
34  *  Probability estimation tables for the arithmetic encoder/decoder
35  *  given by ITU T.82 Table 24.
36  */
37 
38 static short lsztab[113] = {
39   0x5a1d, 0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f,
40   0x0036, 0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25,
41   0x2cf2, 0x207c, 0x17b9, 0x1182, 0x0cef, 0x09a1, 0x072f, 0x055c,
42   0x0406, 0x0303, 0x0240, 0x01b1, 0x0144, 0x00f5, 0x00b7, 0x008a,
43   0x0068, 0x004e, 0x003b, 0x002c, 0x5ae1, 0x484c, 0x3a0d, 0x2ef1,
44   0x261f, 0x1f33, 0x19a8, 0x1518, 0x1177, 0x0e74, 0x0bfb, 0x09f8,
45   0x0861, 0x0706, 0x05cd, 0x04de, 0x040f, 0x0363, 0x02d4, 0x025c,
46   0x01f8, 0x01a4, 0x0160, 0x0125, 0x00f6, 0x00cb, 0x00ab, 0x008f,
47   0x5b12, 0x4d04, 0x412c, 0x37d8, 0x2fe8, 0x293c, 0x2379, 0x1edf,
48   0x1aa9, 0x174e, 0x1424, 0x119c, 0x0f6b, 0x0d51, 0x0bb6, 0x0a40,
49   0x5832, 0x4d1c, 0x438e, 0x3bdd, 0x34ee, 0x2eae, 0x299a, 0x2516,
50   0x5570, 0x4ca9, 0x44d9, 0x3e22, 0x3824, 0x32b4, 0x2e17, 0x56a8,
51   0x4f46, 0x47e5, 0x41cf, 0x3c3d, 0x375e, 0x5231, 0x4c0f, 0x4639,
52   0x415e, 0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522,
53   0x59eb
54 };
55 
56 static unsigned char nmpstab[113] = {
57     1,   2,   3,   4,   5,   6,   7,   8,
58     9,  10,  11,  12,  13,  13,  15,  16,
59    17,  18,  19,  20,  21,  22,  23,  24,
60    25,  26,  27,  28,  29,  30,  31,  32,
61    33,  34,  35,   9,  37,  38,  39,  40,
62    41,  42,  43,  44,  45,  46,  47,  48,
63    49,  50,  51,  52,  53,  54,  55,  56,
64    57,  58,  59,  60,  61,  62,  63,  32,
65    65,  66,  67,  68,  69,  70,  71,  72,
66    73,  74,  75,  76,  77,  78,  79,  48,
67    81,  82,  83,  84,  85,  86,  87,  71,
68    89,  90,  91,  92,  93,  94,  86,  96,
69    97,  98,  99, 100,  93, 102, 103, 104,
70    99, 106, 107, 103, 109, 107, 111, 109,
71   111
72 };
73 
74 /*
75  * least significant 7 bits (mask 0x7f) of nlpstab[] contain NLPS value,
76  * most significant bit (mask 0x80) contains SWTCH bit
77  */
78 static unsigned char nlpstab[113] = {
79   129,  14,  16,  18,  20,  23,  25,  28,
80    30,  33,  35,   9,  10,  12, 143,  36,
81    38,  39,  40,  42,  43,  45,  46,  48,
82    49,  51,  52,  54,  56,  57,  59,  60,
83    62,  63,  32,  33, 165,  64,  65,  67,
84    68,  69,  70,  72,  73,  74,  75,  77,
85    78,  79,  48,  50,  50,  51,  52,  53,
86    54,  55,  56,  57,  58,  59,  61,  61,
87   193,  80,  81,  82,  83,  84,  86,  87,
88    87,  72,  72,  74,  74,  75,  77,  77,
89   208,  88,  89,  90,  91,  92,  93,  86,
90   216,  95,  96,  97,  99,  99,  93, 223,
91   101, 102, 103, 104,  99, 105, 106, 107,
92   103, 233, 108, 109, 110, 111, 238, 112,
93   240
94 };
95 
96 /*
97  * The next functions implement the arithmedic encoder and decoder
98  * required for JBIG. The same algorithm is also used in the arithmetic
99  * variant of JPEG.
100  */
101 
102 /* marker codes */
103 #define MARKER_STUFF    0x00
104 #define MARKER_ESC      0xff
105 
arith_encode_init(struct jbg_arenc_state * s,int reuse_st)106 void arith_encode_init(struct jbg_arenc_state *s, int reuse_st)
107 {
108   int i;
109 
110   if (!reuse_st)
111     for (i = 0; i < 4096; s->st[i++] = 0) ;
112   s->c = 0;
113   s->a = 0x10000L;
114   s->sc = 0;
115   s->ct = 11;
116   s->buffer = -1;    /* empty */
117 
118   return;
119 }
120 
121 
arith_encode_flush(struct jbg_arenc_state * s)122 void arith_encode_flush(struct jbg_arenc_state *s)
123 {
124   unsigned long temp;
125 
126   /* find the s->c in the coding interval with the largest
127    * number of trailing zero bits */
128   if ((temp = (s->a - 1 + s->c) & 0xffff0000L) < s->c)
129     s->c = temp + 0x8000;
130   else
131     s->c = temp;
132   /* send remaining bytes to output */
133   s->c <<= s->ct;
134   if (s->c & 0xf8000000L) {
135     /* one final overflow has to be handled */
136     if (s->buffer >= 0) {
137       s->byte_out(s->buffer + 1, s->file);
138       if (s->buffer + 1 == MARKER_ESC)
139 	s->byte_out(MARKER_STUFF, s->file);
140     }
141     /* output 0x00 bytes only when more non-0x00 will follow */
142     if (s->c & 0x7fff800L)
143       for (; s->sc; --s->sc)
144 	s->byte_out(0x00, s->file);
145   } else {
146     if (s->buffer >= 0)
147       s->byte_out(s->buffer, s->file);
148     /* T.82 figure 30 says buffer+1 for the above line! Typo? */
149     for (; s->sc; --s->sc) {
150       s->byte_out(0xff, s->file);
151       s->byte_out(MARKER_STUFF, s->file);
152     }
153   }
154   /* output final bytes only if they are not 0x00 */
155   if (s->c & 0x7fff800L) {
156     s->byte_out((s->c >> 19) & 0xff, s->file);
157     if (((s->c >> 19) & 0xff) == MARKER_ESC)
158       s->byte_out(MARKER_STUFF, s->file);
159     if (s->c & 0x7f800L) {
160       s->byte_out((s->c >> 11) & 0xff, s->file);
161       if (((s->c >> 11) & 0xff) == MARKER_ESC)
162 	s->byte_out(MARKER_STUFF, s->file);
163     }
164   }
165 
166   return;
167 }
168 
169 
arith_encode(struct jbg_arenc_state * s,int cx,int pix)170 void arith_encode(struct jbg_arenc_state *s, int cx, int pix)
171 {
172   register unsigned lsz, ss;
173   register unsigned char *st;
174   long temp;
175 
176   assert(cx >= 0 && cx < 4096);
177   st = s->st + cx;
178   ss = *st & 0x7f;
179   assert(ss < 113);
180   lsz = lsztab[ss];
181 
182 #if 0
183   fprintf(stderr, "pix = %d, cx = %d, mps = %d, st = %3d, lsz = 0x%04x, "
184 	  "a = 0x%05lx, c = 0x%08lx, ct = %2d, buf = 0x%02x\n",
185 	  pix, cx, !!(s->st[cx] & 0x80), ss, lsz, s->a, s->c, s->ct,
186 	  s->buffer);
187 #endif
188 
189   if (((pix << 7) ^ s->st[cx]) & 0x80) {
190     /* encode the less probable symbol */
191     if ((s->a -= lsz) >= lsz) {
192       /* If the interval size (lsz) for the less probable symbol (LPS)
193        * is larger than the interval size for the MPS, then exchange
194        * the two symbols for coding efficiency, otherwise code the LPS
195        * as usual: */
196       s->c += s->a;
197       s->a = lsz;
198     }
199     /* Check whether MPS/LPS exchange is necessary
200      * and chose next probability estimator status */
201     *st &= 0x80;
202     *st ^= nlpstab[ss];
203   } else {
204     /* encode the more probable symbol */
205     if ((s->a -= lsz) & 0xffff8000L)
206       return;   /* A >= 0x8000 -> ready, no renormalization required */
207     if (s->a < lsz) {
208       /* If the interval size (lsz) for the less probable symbol (LPS)
209        * is larger than the interval size for the MPS, then exchange
210        * the two symbols for coding efficiency: */
211       s->c += s->a;
212       s->a = lsz;
213     }
214     /* chose next probability estimator status */
215     *st &= 0x80;
216     *st |= nmpstab[ss];
217   }
218 
219   /* renormalization of coding interval */
220   do {
221     s->a <<= 1;
222     s->c <<= 1;
223     --s->ct;
224     if (s->ct == 0) {
225       /* another byte is ready for output */
226       temp = s->c >> 19;
227       if (temp & 0xffffff00L) {
228 	/* handle overflow over all buffered 0xff bytes */
229 	if (s->buffer >= 0) {
230 	  ++s->buffer;
231 	  s->byte_out(s->buffer, s->file);
232 	  if (s->buffer == MARKER_ESC)
233 	    s->byte_out(MARKER_STUFF, s->file);
234 	}
235 	for (; s->sc; --s->sc)
236 	  s->byte_out(0x00, s->file);
237 	s->buffer = temp & 0xff;  /* new output byte, might overflow later */
238 	assert(s->buffer != 0xff);
239 	/* can s->buffer really never become 0xff here? */
240       } else if (temp == 0xff) {
241 	/* buffer 0xff byte (which might overflow later) */
242 	++s->sc;
243       } else {
244 	/* output all buffered 0xff bytes, they will not overflow any more */
245 	if (s->buffer >= 0)
246 	  s->byte_out(s->buffer, s->file);
247 	for (; s->sc; --s->sc) {
248 	  s->byte_out(0xff, s->file);
249 	  s->byte_out(MARKER_STUFF, s->file);
250 	}
251 	s->buffer = temp;   /* buffer new output byte (can still overflow) */
252       }
253       s->c &= 0x7ffffL;
254       s->ct = 8;
255     }
256   } while (s->a < 0x8000);
257 
258   return;
259 }
260 
261 
arith_decode_init(struct jbg_ardec_state * s,int reuse_st)262 void arith_decode_init(struct jbg_ardec_state *s, int reuse_st)
263 {
264   int i;
265 
266   if (!reuse_st)
267     for (i = 0; i < 4096; s->st[i++] = 0) ;
268   s->c = 0;
269   s->a = 1;
270   s->ct = 0;
271   s->startup = 1;
272   s->nopadding = 0;
273   return;
274 }
275 
276 /*
277  * Decode and return one symbol from the provided PSCD byte stream
278  * that starts in s->pscd_ptr and ends in the byte before s->pscd_end.
279  * The context cx is a 12-bit integer in the range 0..4095. This
280  * function will advance s->pscd_ptr each time it has consumed all
281  * information from that PSCD byte.
282  *
283  * If a symbol has been decoded successfully, the return value will be
284  * 0 or 1 (depending on the symbol).
285  *
286  * If the decoder was not able to decode a symbol from the provided
287  * PSCD, then the return value will be -1, and two cases can be
288  * distinguished:
289  *
290  * s->pscd_ptr == s->pscd_end:
291  *
292  *   The decoder has used up all information in the provided PSCD
293  *   bytes. Further PSCD bytes have to be provided (via new values of
294  *   s->pscd_ptr and/or s->pscd_end) before another symbol can be
295  *   decoded.
296  *
297  * s->pscd_ptr == s->pscd_end - 1:
298  *
299  *   The decoder has used up all provided PSCD bytes except for the
300  *   very last byte, because that has the value 0xff. The decoder can
301  *   at this point not yet tell whether this 0xff belongs to a
302  *   MARKER_STUFF sequence or marks the end of the PSCD. Further PSCD
303  *   bytes have to be provided (via new values of s->pscd_ptr and/or
304  *   s->pscd_end), including the not yet processed 0xff byte, before
305  *   another symbol can be decoded successfully.
306  *
307  * If s->nopadding != 0, the decoder will return -2 when it reaches
308  * the first two bytes of the marker segment that follows (and
309  * terminates) the PSCD, but before decoding the first symbol that
310  * depends on a bit in the input data that could have been the result
311  * of zero padding, and might, therefore, never have been encoded.
312  * This gives the caller the opportunity to lookahead early enough
313  * beyond a terminating SDNORM/SDRST for a trailing NEWLEN (as
314  * required by T.85) before decoding remaining symbols. Call the
315  * decoder again afterwards as often as necessary (leaving s->pscd_ptr
316  * pointing to the start of the marker segment) to retrieve any
317  * required remaining symbols that might depend on padding.
318  *
319  * [Note that each PSCD can be decoded into an infinitely long
320  * sequence of symbols, because the encoder might have truncated away
321  * an arbitrarily long sequence of trailing 0x00 bytes, which the
322  * decoder will append automatically as needed when it reaches the end
323  * of the PSCD. Therefore, the decoder cannot report any end of the
324  * symbol sequence and other means (external to the PSCD and
325  * arithmetic decoding process) are needed to determine that.]
326  */
327 
arith_decode(struct jbg_ardec_state * s,int cx)328 int arith_decode(struct jbg_ardec_state *s, int cx)
329 {
330   register unsigned lsz, ss;
331   register unsigned char *st;
332   int pix;
333 
334   /* renormalization */
335   while (s->a < 0x8000 || s->startup) {
336     while (s->ct <= 8 && s->ct >= 0) {
337       /* first we can move a new byte into s->c */
338       if (s->pscd_ptr >= s->pscd_end) {
339 	return -1;  /* more bytes needed */
340       }
341       if (*s->pscd_ptr == 0xff)
342 	if (s->pscd_ptr + 1 >= s->pscd_end) {
343 	  return -1; /* final 0xff byte not processed */
344 	} else {
345 	  if (*(s->pscd_ptr + 1) == MARKER_STUFF) {
346 	    s->c |= 0xffL << (8 - s->ct);
347 	    s->ct += 8;
348 	    s->pscd_ptr += 2;
349 	  } else {
350 	    s->ct = -1; /* start padding with zero bytes */
351 	    if (s->nopadding) {
352 	      s->nopadding = 0;
353 	      return -2; /* subsequent symbols might depend on zero padding */
354 	    }
355 	  }
356 	}
357       else {
358 	s->c |= (long)*(s->pscd_ptr++) << (8 - s->ct);
359 	s->ct += 8;
360       }
361     }
362     s->c <<= 1;
363     s->a <<= 1;
364     if (s->ct >= 0) s->ct--;
365     if (s->a == 0x10000L)
366       s->startup = 0;
367   }
368 
369   st = s->st + cx;
370   ss = *st & 0x7f;
371   assert(ss < 113);
372   lsz = lsztab[ss];
373 
374 #if 0
375   fprintf(stderr, "cx = %d, mps = %d, st = %3d, lsz = 0x%04x, a = 0x%05lx, "
376 	  "c = 0x%08lx, ct = %2d\n",
377 	  cx, !!(s->st[cx] & 0x80), ss, lsz, s->a, s->c, s->ct);
378 #endif
379 
380   if ((s->c >> 16) < (s->a -= lsz))
381     if (s->a & 0xffff8000L)
382       return *st >> 7;
383     else {
384       /* MPS_EXCHANGE */
385       if (s->a < lsz) {
386 	pix = 1 - (*st >> 7);
387 	/* Check whether MPS/LPS exchange is necessary
388 	 * and chose next probability estimator status */
389 	*st &= 0x80;
390 	*st ^= nlpstab[ss];
391       } else {
392 	pix = *st >> 7;
393 	*st &= 0x80;
394 	*st |= nmpstab[ss];
395       }
396     }
397   else {
398     /* LPS_EXCHANGE */
399     if (s->a < lsz) {
400       s->c -= s->a << 16;
401       s->a = lsz;
402       pix = *st >> 7;
403       *st &= 0x80;
404       *st |= nmpstab[ss];
405     } else {
406       s->c -= s->a << 16;
407       s->a = lsz;
408       pix = 1 - (*st >> 7);
409       /* Check whether MPS/LPS exchange is necessary
410        * and chose next probability estimator status */
411       *st &= 0x80;
412       *st ^= nlpstab[ss];
413     }
414   }
415 
416   return pix;
417 }
418