1 /* Copyright (C) 2001-2012 Artifex Software, Inc.
2    All Rights Reserved.
3 
4    This software is provided AS-IS with no warranty, either express or
5    implied.
6 
7    This software is distributed under license and may not be copied,
8    modified or distributed except as expressly authorized under the terms
9    of the license contained in the file LICENSE in this distribution.
10 
11    Refer to licensing information at http://www.artifex.com or contact
12    Artifex Software, Inc.,  7 Mt. Lassen Drive - Suite A-134, San Rafael,
13    CA  94903, U.S.A., +1(415)492-9861, for further information.
14 */
15 
16 
17 /* Pixel differencing filters */
18 #include "stdio_.h"		/* should be std.h, but needs NULL */
19 #include "memory_.h"
20 #include "strimpl.h"
21 #include "spdiffx.h"
22 
23 /* ------ PixelDifferenceEncode/Decode ------ */
24 
25 private_st_PDiff_state();
26 
27 /* Define values for case dispatch. */
28 #define cBits1 0
29 #define cBits2 5
30 #define cBits4 10
31 #define cBits8 15
32 #define cBits16 20
33 #define cEncode 0
34 #define cDecode 25
35 
36 /* Set defaults */
37 static void
s_PDiff_set_defaults(stream_state * st)38 s_PDiff_set_defaults(stream_state * st)
39 {
40     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
41 
42     s_PDiff_set_defaults_inline(ss);
43 }
44 
45 /* Common (re)initialization. */
46 static int
s_PDiff_reinit(stream_state * st)47 s_PDiff_reinit(stream_state * st)
48 {
49     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
50 
51     ss->row_left = 0;
52     return 0;
53 }
54 
55 /* Initialize PixelDifferenceEncode filter. */
56 static int
s_PDiffE_init(stream_state * st)57 s_PDiffE_init(stream_state * st)
58 {
59     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
60     int bits_per_row =
61         ss->Colors * ss->BitsPerComponent * ss->Columns;
62     static const byte cb_values[] = {
63         0, cBits1, cBits2, 0, cBits4, 0, 0, 0, cBits8,
64         0, 0, 0, 0, 0, 0, 0, cBits16
65     };
66 
67     ss->row_count = (bits_per_row + 7) >> 3;
68     ss->end_mask = (1 << (-bits_per_row & 7)) - 1;
69     ss->case_index =
70         cb_values[ss->BitsPerComponent] +
71         (ss->Colors > 4 ? 0 : ss->Colors) + cEncode;
72     return s_PDiff_reinit(st);
73 }
74 
75 /* Initialize PixelDifferenceDecode filter. */
76 static int
s_PDiffD_init(stream_state * st)77 s_PDiffD_init(stream_state * st)
78 {
79     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
80 
81     s_PDiffE_init(st);
82     ss->case_index += cDecode - cEncode;
83     return 0;
84 }
85 
86 /* Process a buffer.  Note that this handles both Encode and Decode. */
87 static int
s_PDiff_process(stream_state * st,stream_cursor_read * pr,stream_cursor_write * pw,bool last)88 s_PDiff_process(stream_state * st, stream_cursor_read * pr,
89                 stream_cursor_write * pw, bool last)
90 {
91     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
92     const byte *p = pr->ptr;
93     byte *q = pw->ptr;
94     int count;
95     int status = 0;
96     uint s0 = ss->prev[0];
97     byte t = 0;			/* avoid spurious compiler warnings */
98     int  ti;
99     const byte end_mask = ss->end_mask;
100     int colors = ss->Colors;
101     int nb = (colors * ss->BitsPerComponent) >> 3;
102     int final;
103     int ndone, ci;
104 
105 row:
106     if (ss->row_left == 0) {
107         ss->row_left = ss->row_count;
108         s0 = 0;
109         memset(&ss->prev[1], 0, sizeof(uint) * (s_PDiff_max_Colors - 1));
110     }
111     {
112         int rcount = pr->limit - p;
113         int wcount = pw->limit - q;
114 
115         if (ss->row_left < rcount)
116             rcount = ss->row_left;
117         count = (wcount < rcount ? (status = 1, wcount) : rcount);
118     }
119     final = (last && !status ? 1 : nb);
120     ss->row_left -= count;
121 
122     /*
123      * Encoding and decoding are fundamentally different.
124      * Encoding computes E[i] = D[i] - D[i-1];
125      * decoding computes D[i] = E[i] + D[i-1].
126      * Nevertheless, the loop structures are similar enough that
127      * we put the code for both functions in the same place.
128      *
129      * We only optimize Colors = 1, 3, and 4, which correspond to the common
130      * color spaces.  (In some cases, it's still simpler to provide a
131      * separate loop for Colors = 2.)
132      */
133 
134 #define LOOP_BY(n, body)\
135   for (; count >= n; count -= n) p += n, q += n, body
136 
137     switch (ss->case_index) {
138 
139             /* 1 bit per component */
140 
141 #define ENCODE1_LOOP(ee)\
142   LOOP_BY(1, (t = *p, *q = ee, s0 = t)); break
143 
144 #define ENCODE_ALIGNED_LOOP(ee)\
145   BEGIN\
146     ss->prev[0] = s0;\
147     for (; count >= final; count -= ndone) {\
148         ndone = min(count, nb);\
149         for (ci = 0; ci < ndone; ++ci)\
150             t = *++p, *++q = ee, ss->prev[ci] = t;\
151     }\
152     s0 = ss->prev[0];\
153   END
154 
155 #define ENCODE_UNALIGNED_LOOP(shift, cshift, de)\
156   BEGIN\
157     for (; count >= final; count -= ndone) {\
158         ndone = min(count, nb);\
159         for (ci = 1; ci <= ndone; ++ci) {\
160             ++p;\
161             t = (s0 << (cshift)) | (ss->prev[ci] >> (shift));\
162             *++q = de;\
163             s0 = ss->prev[ci];\
164             ss->prev[ci] = *p;\
165         }\
166     }\
167   END
168 
169         case cEncode + cBits1 + 0:
170         case cEncode + cBits1 + 2:
171             if (colors < 8) {	/* 2,5,6,7 */
172                 int cshift = 8 - colors;
173 
174                 ENCODE1_LOOP(t ^ ((s0 << cshift) | (t >> colors)));
175             } else if (colors & 7) {
176                 int shift = colors & 7;
177                 int cshift = 8 - shift;
178 
179                 ENCODE_UNALIGNED_LOOP(shift, cshift, *p ^ t);
180             } else {
181                 ENCODE_ALIGNED_LOOP(t ^ ss->prev[ci]);
182             }
183             break;
184 
185         case cEncode + cBits1 + 1:
186             ENCODE1_LOOP(t ^ ((s0 << 7) | (t >> 1)));
187         case cEncode + cBits1 + 3:
188             ENCODE1_LOOP(t ^ ((s0 << 5) | (t >> 3)));
189         case cEncode + cBits1 + 4:
190             ENCODE1_LOOP(t ^ ((s0 << 4) | (t >> 4)));
191 
192 #define DECODE1_LOOP(te, de)\
193   LOOP_BY(1, (t = te, s0 = *q = de)); break
194 
195 #define DECODE_ALIGNED_LOOP(de)\
196   BEGIN\
197     ss->prev[0] = s0;\
198     for (; count >= final; count -= ndone) {\
199         ndone = min(count, nb);\
200         for (ci = 0; ci < ndone; ++ci)\
201             t = *++p, ss->prev[ci] = *++q = de;\
202     }\
203     s0 = ss->prev[0];\
204   END
205 
206 #define DECODE_UNALIGNED_LOOP(shift, cshift, de)\
207   BEGIN\
208     for (; count >= final; count -= ndone) {\
209         ndone = min(count, nb);\
210         for (ci = 1; ci <= ndone; ++ci) {\
211             ++p, ++q;\
212             t = (s0 << (cshift)) | (ss->prev[ci] >> (shift));\
213             s0 = ss->prev[ci];\
214             ss->prev[ci] = *q = de;\
215         }\
216     }\
217   END
218 
219         case cDecode + cBits1 + 0:
220             if (colors < 8) {	/* 5,6,7 */
221                 int cshift = 8 - colors;
222 
223                 DECODE1_LOOP(*p ^ (s0 << cshift), t ^ (t >> colors));
224             } else if (colors & 7) {
225                 int shift = colors & 7;
226                 int cshift = 8 - shift;
227 
228                 DECODE_UNALIGNED_LOOP(shift, cshift, *p ^ t);
229             } else {
230                 DECODE_ALIGNED_LOOP(t ^ ss->prev[ci]);
231             }
232             break;
233 
234         case cDecode + cBits1 + 1:
235             DECODE1_LOOP(*p ^ (s0 << 7),
236                          (t ^= t >> 1, t ^= t >> 2, t ^ (t >> 4)));
237         case cDecode + cBits1 + 2:
238             DECODE1_LOOP(*p ^ (s0 << 6),
239                          (t ^= (t >> 2), t ^ (t >> 4)));
240         case cDecode + cBits1 + 3:
241             DECODE1_LOOP(*p ^ (s0 << 5),
242                          t ^ (t >> 3) ^ (t >> 6));
243         case cDecode + cBits1 + 4:
244             DECODE1_LOOP(*p ^ (s0 << 4),
245                          t ^ (t >> 4));
246 
247             /* 2 bits per component */
248 
249 #define ADD4X2(a, b) ( (((a) & (b) & 0x55) << 1) ^ (a) ^ (b) )
250 /* The following computation looks very implausible, but it is correct. */
251 #define SUB4X2(a, b) ( ((~(a) & (b) & 0x55) << 1) ^ (a) ^ (b) )
252 
253         case cEncode + cBits2 + 0:
254             if (colors & 7) {
255                 int shift = (colors & 3) << 1;
256                 int cshift = 8 - shift;
257 
258                 ENCODE_UNALIGNED_LOOP(shift, cshift, SUB4X2(*p, t));
259             } else {
260                 ENCODE_ALIGNED_LOOP(SUB4X2(t, ss->prev[ci]));
261             }
262             break;
263 
264         case cEncode + cBits2 + 1:
265             ENCODE1_LOOP((s0 = (s0 << 6) | (t >> 2), SUB4X2(t, s0)));
266         case cEncode + cBits2 + 2:
267             ENCODE1_LOOP((s0 = (s0 << 4) | (t >> 4), SUB4X2(t, s0)));
268         case cEncode + cBits2 + 3:
269             ENCODE1_LOOP((s0 = (s0 << 2) | (t >> 6), SUB4X2(t, s0)));
270         case cEncode + cBits2 + 4:
271             ENCODE1_LOOP(SUB4X2(t, s0));
272 
273         case cDecode + cBits2 + 0:
274             if (colors & 7) {
275                 int shift = (colors & 3) << 1;
276                 int cshift = 8 - shift;
277 
278                 DECODE_UNALIGNED_LOOP(shift, cshift, ADD4X2(*p, t));
279             } else {
280                 DECODE_ALIGNED_LOOP(ADD4X2(t, ss->prev[ci]));
281             }
282             break;
283 
284         case cDecode + cBits2 + 1:
285             DECODE1_LOOP(*p + (s0 << 6),
286                          (t = ADD4X2(t >> 2, t), ADD4X2(t >> 4, t)));
287         case cDecode + cBits2 + 2:
288             DECODE1_LOOP(*p, (t = ADD4X2(t, s0 << 4), ADD4X2(t >> 4, t)));
289         case cDecode + cBits2 + 3:
290             DECODE1_LOOP(*p, (t = ADD4X2(t, s0 << 2), ADD4X2(t >> 6, t)));
291         case cDecode + cBits2 + 4:
292             DECODE1_LOOP(*p, ADD4X2(t, s0));
293 
294 #undef ADD4X2
295 #undef SUB4X2
296 
297             /* 4 bits per component */
298 
299 #define ADD2X4(a, b) ( (((a) + (b)) & 0xf) + ((a) & 0xf0) + ((b) & 0xf0) )
300 #define ADD2X4R4(a) ( (((a) + ((a) >> 4)) & 0xf) + ((a) & 0xf0) )
301 #define SUB2X4(a, b) ( (((a) - (b)) & 0xf) + ((a) & 0xf0) - ((b) & 0xf0) )
302 #define SUB2X4R4(a) ( (((a) - ((a) >> 4)) & 0xf) + ((a) & 0xf0) )
303 
304         case cEncode + cBits4 + 0:
305         case cEncode + cBits4 + 2:
306     enc4:
307             if (colors & 1) {
308                 ENCODE_UNALIGNED_LOOP(4, 4, SUB2X4(*p, t));
309             } else {
310                 ENCODE_ALIGNED_LOOP(SUB2X4(t, ss->prev[ci]));
311             }
312             break;
313 
314         case cEncode + cBits4 + 1:
315             ENCODE1_LOOP(((t - (s0 << 4)) & 0xf0) | ((t - (t >> 4)) & 0xf));
316 
317         case cEncode + cBits4 + 3: {
318             uint s1 = ss->prev[1];
319 
320             LOOP_BY(1,
321                     (t = *p,
322                      *q = ((t - (s0 << 4)) & 0xf0) | ((t - (s1 >> 4)) & 0xf),
323                      s0 = s1, s1 = t));
324             ss->prev[1] = s1;
325         } break;
326 
327         case cEncode + cBits4 + 4: {
328             uint s1 = ss->prev[1];
329 
330             LOOP_BY(2,
331                     (t = p[-1], q[-1] = SUB2X4(t, s0), s0 = t,
332                      t = *p, *q = SUB2X4(t, s1), s1 = t));
333             ss->prev[1] = s1;
334             goto enc4;		/* handle leftover bytes */
335         }
336 
337         case cDecode + cBits4 + 0:
338         case cDecode + cBits4 + 2:
339     dec4:
340             if (colors & 1) {
341                 DECODE_UNALIGNED_LOOP(4, 4, ADD2X4(*p, t));
342             } else {
343                 DECODE_ALIGNED_LOOP(ADD2X4(t, ss->prev[ci]));
344             }
345             break;
346 
347         case cDecode + cBits4 + 1:
348             DECODE1_LOOP(*p + (s0 << 4), ADD2X4R4(t));
349 
350         case cDecode + cBits4 + 3: {
351             uint s1 = ss->prev[1];
352 
353             LOOP_BY(1, (t = (s0 << 4) + (s1 >> 4),
354                         s0 = s1, s1 = *q = ADD2X4(*p, t)));
355             ss->prev[1] = s1;
356         } break;
357 
358         case cDecode + cBits4 + 4: {
359             uint s1 = ss->prev[1];
360 
361             LOOP_BY(2,
362                     (t = p[-1], s0 = q[-1] = ADD2X4(s0, t),
363                      t = *p, s1 = *q = ADD2X4(s1, t)));
364             ss->prev[1] = s1;
365             goto dec4;		/* handle leftover bytes */
366         }
367 
368 #undef ADD2X4
369 #undef ADD2X4R4
370 #undef SUB2X4
371 #undef SUB2X4R4
372 
373             /* 8 bits per component */
374 
375 #define ENCODE8(s, d) (q[d] = p[d] - s, s = p[d])
376 #define DECODE8(s, d) q[d] = s += p[d]
377 
378         case cEncode + cBits8 + 0:
379         case cEncode + cBits8 + 2:
380             ss->prev[0] = s0;
381             for (; count >= colors; count -= colors)
382                 for (ci = 0; ci < colors; ++ci) {
383                     *++q = *++p - ss->prev[ci];
384                     ss->prev[ci] = *p;
385                 }
386             s0 = ss->prev[0];
387     enc8:   /* Handle leftover bytes. */
388             if (last && !status)
389                 for (ci = 0; ci < count; ++ci)
390                     *++q = *++p - ss->prev[ci],
391                         ss->prev[ci] = *p;
392             break;
393 
394         case cDecode + cBits8 + 0:
395         case cDecode + cBits8 + 2:
396             ss->prev[0] = s0;
397             for (; count >= colors; count -= colors)
398                 for (ci = 0; ci < colors; ++ci)
399                     *++q = ss->prev[ci] += *++p;
400             s0 = ss->prev[0];
401     dec8:   /* Handle leftover bytes. */
402             if (last && !status)
403                 for (ci = 0; ci < count; ++ci)
404                     *++q = ss->prev[ci] += *++p;
405             break;
406 
407         case cEncode + cBits8 + 1:
408             LOOP_BY(1, ENCODE8(s0, 0));
409             break;
410 
411         case cDecode + cBits8 + 1:
412             LOOP_BY(1, DECODE8(s0, 0));
413             break;
414 
415         case cEncode + cBits8 + 3: {
416             uint s1 = ss->prev[1], s2 = ss->prev[2];
417 
418             LOOP_BY(3, (ENCODE8(s0, -2), ENCODE8(s1, -1),
419                         ENCODE8(s2, 0)));
420             ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2;
421             goto enc8;
422         }
423 
424         case cDecode + cBits8 + 3: {
425             uint s1 = ss->prev[1], s2 = ss->prev[2];
426 
427             LOOP_BY(3, (DECODE8(s0, -2), DECODE8(s1, -1),
428                         DECODE8(s2, 0)));
429             ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2;
430             goto dec8;
431         } break;
432 
433         case cEncode + cBits8 + 4: {
434             uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
435 
436             LOOP_BY(4, (ENCODE8(s0, -3), ENCODE8(s1, -2),
437                         ENCODE8(s2, -1), ENCODE8(s3, 0)));
438             ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
439             goto enc8;
440         } break;
441 
442         case cDecode + cBits8 + 4: {
443             uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
444 
445             LOOP_BY(4, (DECODE8(s0, -3), DECODE8(s1, -2),
446                         DECODE8(s2, -1), DECODE8(s3, 0)));
447             ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
448             goto dec8;
449         } break;
450 
451 #undef ENCODE8
452 #undef DECODE8
453 
454             /* 16 bits per component */
455 
456 #define ENCODE16(s, d) (ti = ((p[d-1] << 8) + p[d]), s = ti - s,\
457             q[d-1] = s >> 8, q[d] = s & 0xff, s = ti)
458 #define DECODE16(s, d) (s = 0xffff & (s + ((p[d-1] << 8) + p[d])), \
459             q[d-1] = s >> 8, q[d] = s & 0xff)
460 
461         case cEncode + cBits16 + 0:
462         case cEncode + cBits16 + 2:
463             ss->prev[0] = s0;
464             for (; count >= colors*2; count -= colors*2)
465                 for (ci = 0; ci < colors; ++ci) {
466                     uint k;
467                     ti =  (int)*++p << 8;
468                     ti += (int)*++p;
469                     k = ti - ss->prev[ci];
470                     *++q = k >> 8;
471                     *++q = k & 0xff;
472                     ss->prev[ci] = ti;
473                 }
474             s0 = ss->prev[0];
475     enc16:   /* Handle leftover bytes. */
476             if (last && !status)
477                 for (ci = 0; ci < count; ++ci)
478                     *++q = *++p - ss->prev[ci],
479                         ss->prev[ci] = *p;
480             break;
481 
482         case cDecode + cBits16 + 0:
483         case cDecode + cBits16 + 2:
484             ss->prev[0] = s0;
485             for (; count >= colors*2; count -= colors*2)
486                 for (ci = 0; ci < colors; ++ci) {
487                     ti = (int)*++p << 8;
488                     ss->prev[ci] += ti + *++p;
489                     *++q = ss->prev[ci] >> 8;
490                     *++q = ss->prev[ci] & 0xff;
491                 }
492             s0 = ss->prev[0];
493     dec16:   /* Ignore leftover bytes. */
494             break;
495 
496         case cEncode + cBits16 + 1:
497            LOOP_BY(2, ENCODE16(s0, 0));
498            break;
499 
500         case cDecode + cBits16 + 1:
501            LOOP_BY(2, DECODE16(s0, 0));
502            break;
503 
504         case cEncode + cBits16 + 3: {
505             uint s1 = ss->prev[1], s2 = ss->prev[2];
506 
507             LOOP_BY(6, (ENCODE16(s0, -4), ENCODE16(s1, -2),
508                         ENCODE16(s2, 0)));
509             ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2;
510             goto enc16;
511         }
512 
513         case cDecode + cBits16 + 3: {
514             uint s1 = ss->prev[1], s2 = ss->prev[2];
515 
516             LOOP_BY(6, (DECODE16(s0, -4), DECODE16(s1, -2),
517                         DECODE16(s2, 0)));
518             ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2;
519             goto dec16;
520         } break;
521 
522         case cEncode + cBits16 + 4: {
523             uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
524 
525             LOOP_BY(8, (ENCODE16(s0, -6), ENCODE16(s1, -4),
526                         ENCODE16(s2, -2), ENCODE16(s3, 0)));
527             ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
528             goto enc16;
529         } break;
530 
531         case cDecode + cBits16 + 4: {
532             uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
533 
534             LOOP_BY(8, (DECODE16(s0, -6), DECODE16(s1, -4),
535                         DECODE16(s2, -2), DECODE16(s3, 0)));
536             ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
537             goto dec16;
538         } break;
539 
540 #undef ENCODE16
541 #undef DECODE16
542 
543     }
544 #undef LOOP_BY
545 #undef ENCODE1_LOOP
546 #undef DECODE1_LOOP
547     ss->row_left += count;	/* leftover bytes are possible */
548     if (ss->row_left == 0) {
549         if (end_mask != 0)
550             *q = (*q & ~end_mask) | (*p & end_mask);
551         if (p < pr->limit && q < pw->limit)
552             goto row;
553     }
554     ss->prev[0] = s0;
555     pr->ptr = p;
556     pw->ptr = q;
557     return status;
558 }
559 
560 /* Stream templates */
561 const stream_template s_PDiffE_template = {
562     &st_PDiff_state, s_PDiffE_init, s_PDiff_process, 1, 1, NULL,
563     s_PDiff_set_defaults, s_PDiff_reinit
564 };
565 const stream_template s_PDiffD_template = {
566     &st_PDiff_state, s_PDiffD_init, s_PDiff_process, 1, 1, NULL,
567     s_PDiff_set_defaults, s_PDiff_reinit
568 };
569