1 /* Copyright (C) 2001-2012 Artifex Software, Inc.
2 All Rights Reserved.
3
4 This software is provided AS-IS with no warranty, either express or
5 implied.
6
7 This software is distributed under license and may not be copied,
8 modified or distributed except as expressly authorized under the terms
9 of the license contained in the file LICENSE in this distribution.
10
11 Refer to licensing information at http://www.artifex.com or contact
12 Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134, San Rafael,
13 CA 94903, U.S.A., +1(415)492-9861, for further information.
14 */
15
16
17 /* Pixel differencing filters */
18 #include "stdio_.h" /* should be std.h, but needs NULL */
19 #include "memory_.h"
20 #include "strimpl.h"
21 #include "spdiffx.h"
22
23 /* ------ PixelDifferenceEncode/Decode ------ */
24
25 private_st_PDiff_state();
26
27 /* Define values for case dispatch. */
28 #define cBits1 0
29 #define cBits2 5
30 #define cBits4 10
31 #define cBits8 15
32 #define cBits16 20
33 #define cEncode 0
34 #define cDecode 25
35
36 /* Set defaults */
37 static void
s_PDiff_set_defaults(stream_state * st)38 s_PDiff_set_defaults(stream_state * st)
39 {
40 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
41
42 s_PDiff_set_defaults_inline(ss);
43 }
44
45 /* Common (re)initialization. */
46 static int
s_PDiff_reinit(stream_state * st)47 s_PDiff_reinit(stream_state * st)
48 {
49 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
50
51 ss->row_left = 0;
52 return 0;
53 }
54
55 /* Initialize PixelDifferenceEncode filter. */
56 static int
s_PDiffE_init(stream_state * st)57 s_PDiffE_init(stream_state * st)
58 {
59 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
60 int bits_per_row =
61 ss->Colors * ss->BitsPerComponent * ss->Columns;
62 static const byte cb_values[] = {
63 0, cBits1, cBits2, 0, cBits4, 0, 0, 0, cBits8,
64 0, 0, 0, 0, 0, 0, 0, cBits16
65 };
66
67 ss->row_count = (bits_per_row + 7) >> 3;
68 ss->end_mask = (1 << (-bits_per_row & 7)) - 1;
69 ss->case_index =
70 cb_values[ss->BitsPerComponent] +
71 (ss->Colors > 4 ? 0 : ss->Colors) + cEncode;
72 return s_PDiff_reinit(st);
73 }
74
75 /* Initialize PixelDifferenceDecode filter. */
76 static int
s_PDiffD_init(stream_state * st)77 s_PDiffD_init(stream_state * st)
78 {
79 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
80
81 s_PDiffE_init(st);
82 ss->case_index += cDecode - cEncode;
83 return 0;
84 }
85
86 /* Process a buffer. Note that this handles both Encode and Decode. */
87 static int
s_PDiff_process(stream_state * st,stream_cursor_read * pr,stream_cursor_write * pw,bool last)88 s_PDiff_process(stream_state * st, stream_cursor_read * pr,
89 stream_cursor_write * pw, bool last)
90 {
91 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
92 const byte *p = pr->ptr;
93 byte *q = pw->ptr;
94 int count;
95 int status = 0;
96 uint s0 = ss->prev[0];
97 byte t = 0; /* avoid spurious compiler warnings */
98 int ti;
99 const byte end_mask = ss->end_mask;
100 int colors = ss->Colors;
101 int nb = (colors * ss->BitsPerComponent) >> 3;
102 int final;
103 int ndone, ci;
104
105 row:
106 if (ss->row_left == 0) {
107 ss->row_left = ss->row_count;
108 s0 = 0;
109 memset(&ss->prev[1], 0, sizeof(uint) * (s_PDiff_max_Colors - 1));
110 }
111 {
112 int rcount = pr->limit - p;
113 int wcount = pw->limit - q;
114
115 if (ss->row_left < rcount)
116 rcount = ss->row_left;
117 count = (wcount < rcount ? (status = 1, wcount) : rcount);
118 }
119 final = (last && !status ? 1 : nb);
120 ss->row_left -= count;
121
122 /*
123 * Encoding and decoding are fundamentally different.
124 * Encoding computes E[i] = D[i] - D[i-1];
125 * decoding computes D[i] = E[i] + D[i-1].
126 * Nevertheless, the loop structures are similar enough that
127 * we put the code for both functions in the same place.
128 *
129 * We only optimize Colors = 1, 3, and 4, which correspond to the common
130 * color spaces. (In some cases, it's still simpler to provide a
131 * separate loop for Colors = 2.)
132 */
133
134 #define LOOP_BY(n, body)\
135 for (; count >= n; count -= n) p += n, q += n, body
136
137 switch (ss->case_index) {
138
139 /* 1 bit per component */
140
141 #define ENCODE1_LOOP(ee)\
142 LOOP_BY(1, (t = *p, *q = ee, s0 = t)); break
143
144 #define ENCODE_ALIGNED_LOOP(ee)\
145 BEGIN\
146 ss->prev[0] = s0;\
147 for (; count >= final; count -= ndone) {\
148 ndone = min(count, nb);\
149 for (ci = 0; ci < ndone; ++ci)\
150 t = *++p, *++q = ee, ss->prev[ci] = t;\
151 }\
152 s0 = ss->prev[0];\
153 END
154
155 #define ENCODE_UNALIGNED_LOOP(shift, cshift, de)\
156 BEGIN\
157 for (; count >= final; count -= ndone) {\
158 ndone = min(count, nb);\
159 for (ci = 1; ci <= ndone; ++ci) {\
160 ++p;\
161 t = (s0 << (cshift)) | (ss->prev[ci] >> (shift));\
162 *++q = de;\
163 s0 = ss->prev[ci];\
164 ss->prev[ci] = *p;\
165 }\
166 }\
167 END
168
169 case cEncode + cBits1 + 0:
170 case cEncode + cBits1 + 2:
171 if (colors < 8) { /* 2,5,6,7 */
172 int cshift = 8 - colors;
173
174 ENCODE1_LOOP(t ^ ((s0 << cshift) | (t >> colors)));
175 } else if (colors & 7) {
176 int shift = colors & 7;
177 int cshift = 8 - shift;
178
179 ENCODE_UNALIGNED_LOOP(shift, cshift, *p ^ t);
180 } else {
181 ENCODE_ALIGNED_LOOP(t ^ ss->prev[ci]);
182 }
183 break;
184
185 case cEncode + cBits1 + 1:
186 ENCODE1_LOOP(t ^ ((s0 << 7) | (t >> 1)));
187 case cEncode + cBits1 + 3:
188 ENCODE1_LOOP(t ^ ((s0 << 5) | (t >> 3)));
189 case cEncode + cBits1 + 4:
190 ENCODE1_LOOP(t ^ ((s0 << 4) | (t >> 4)));
191
192 #define DECODE1_LOOP(te, de)\
193 LOOP_BY(1, (t = te, s0 = *q = de)); break
194
195 #define DECODE_ALIGNED_LOOP(de)\
196 BEGIN\
197 ss->prev[0] = s0;\
198 for (; count >= final; count -= ndone) {\
199 ndone = min(count, nb);\
200 for (ci = 0; ci < ndone; ++ci)\
201 t = *++p, ss->prev[ci] = *++q = de;\
202 }\
203 s0 = ss->prev[0];\
204 END
205
206 #define DECODE_UNALIGNED_LOOP(shift, cshift, de)\
207 BEGIN\
208 for (; count >= final; count -= ndone) {\
209 ndone = min(count, nb);\
210 for (ci = 1; ci <= ndone; ++ci) {\
211 ++p, ++q;\
212 t = (s0 << (cshift)) | (ss->prev[ci] >> (shift));\
213 s0 = ss->prev[ci];\
214 ss->prev[ci] = *q = de;\
215 }\
216 }\
217 END
218
219 case cDecode + cBits1 + 0:
220 if (colors < 8) { /* 5,6,7 */
221 int cshift = 8 - colors;
222
223 DECODE1_LOOP(*p ^ (s0 << cshift), t ^ (t >> colors));
224 } else if (colors & 7) {
225 int shift = colors & 7;
226 int cshift = 8 - shift;
227
228 DECODE_UNALIGNED_LOOP(shift, cshift, *p ^ t);
229 } else {
230 DECODE_ALIGNED_LOOP(t ^ ss->prev[ci]);
231 }
232 break;
233
234 case cDecode + cBits1 + 1:
235 DECODE1_LOOP(*p ^ (s0 << 7),
236 (t ^= t >> 1, t ^= t >> 2, t ^ (t >> 4)));
237 case cDecode + cBits1 + 2:
238 DECODE1_LOOP(*p ^ (s0 << 6),
239 (t ^= (t >> 2), t ^ (t >> 4)));
240 case cDecode + cBits1 + 3:
241 DECODE1_LOOP(*p ^ (s0 << 5),
242 t ^ (t >> 3) ^ (t >> 6));
243 case cDecode + cBits1 + 4:
244 DECODE1_LOOP(*p ^ (s0 << 4),
245 t ^ (t >> 4));
246
247 /* 2 bits per component */
248
249 #define ADD4X2(a, b) ( (((a) & (b) & 0x55) << 1) ^ (a) ^ (b) )
250 /* The following computation looks very implausible, but it is correct. */
251 #define SUB4X2(a, b) ( ((~(a) & (b) & 0x55) << 1) ^ (a) ^ (b) )
252
253 case cEncode + cBits2 + 0:
254 if (colors & 7) {
255 int shift = (colors & 3) << 1;
256 int cshift = 8 - shift;
257
258 ENCODE_UNALIGNED_LOOP(shift, cshift, SUB4X2(*p, t));
259 } else {
260 ENCODE_ALIGNED_LOOP(SUB4X2(t, ss->prev[ci]));
261 }
262 break;
263
264 case cEncode + cBits2 + 1:
265 ENCODE1_LOOP((s0 = (s0 << 6) | (t >> 2), SUB4X2(t, s0)));
266 case cEncode + cBits2 + 2:
267 ENCODE1_LOOP((s0 = (s0 << 4) | (t >> 4), SUB4X2(t, s0)));
268 case cEncode + cBits2 + 3:
269 ENCODE1_LOOP((s0 = (s0 << 2) | (t >> 6), SUB4X2(t, s0)));
270 case cEncode + cBits2 + 4:
271 ENCODE1_LOOP(SUB4X2(t, s0));
272
273 case cDecode + cBits2 + 0:
274 if (colors & 7) {
275 int shift = (colors & 3) << 1;
276 int cshift = 8 - shift;
277
278 DECODE_UNALIGNED_LOOP(shift, cshift, ADD4X2(*p, t));
279 } else {
280 DECODE_ALIGNED_LOOP(ADD4X2(t, ss->prev[ci]));
281 }
282 break;
283
284 case cDecode + cBits2 + 1:
285 DECODE1_LOOP(*p + (s0 << 6),
286 (t = ADD4X2(t >> 2, t), ADD4X2(t >> 4, t)));
287 case cDecode + cBits2 + 2:
288 DECODE1_LOOP(*p, (t = ADD4X2(t, s0 << 4), ADD4X2(t >> 4, t)));
289 case cDecode + cBits2 + 3:
290 DECODE1_LOOP(*p, (t = ADD4X2(t, s0 << 2), ADD4X2(t >> 6, t)));
291 case cDecode + cBits2 + 4:
292 DECODE1_LOOP(*p, ADD4X2(t, s0));
293
294 #undef ADD4X2
295 #undef SUB4X2
296
297 /* 4 bits per component */
298
299 #define ADD2X4(a, b) ( (((a) + (b)) & 0xf) + ((a) & 0xf0) + ((b) & 0xf0) )
300 #define ADD2X4R4(a) ( (((a) + ((a) >> 4)) & 0xf) + ((a) & 0xf0) )
301 #define SUB2X4(a, b) ( (((a) - (b)) & 0xf) + ((a) & 0xf0) - ((b) & 0xf0) )
302 #define SUB2X4R4(a) ( (((a) - ((a) >> 4)) & 0xf) + ((a) & 0xf0) )
303
304 case cEncode + cBits4 + 0:
305 case cEncode + cBits4 + 2:
306 enc4:
307 if (colors & 1) {
308 ENCODE_UNALIGNED_LOOP(4, 4, SUB2X4(*p, t));
309 } else {
310 ENCODE_ALIGNED_LOOP(SUB2X4(t, ss->prev[ci]));
311 }
312 break;
313
314 case cEncode + cBits4 + 1:
315 ENCODE1_LOOP(((t - (s0 << 4)) & 0xf0) | ((t - (t >> 4)) & 0xf));
316
317 case cEncode + cBits4 + 3: {
318 uint s1 = ss->prev[1];
319
320 LOOP_BY(1,
321 (t = *p,
322 *q = ((t - (s0 << 4)) & 0xf0) | ((t - (s1 >> 4)) & 0xf),
323 s0 = s1, s1 = t));
324 ss->prev[1] = s1;
325 } break;
326
327 case cEncode + cBits4 + 4: {
328 uint s1 = ss->prev[1];
329
330 LOOP_BY(2,
331 (t = p[-1], q[-1] = SUB2X4(t, s0), s0 = t,
332 t = *p, *q = SUB2X4(t, s1), s1 = t));
333 ss->prev[1] = s1;
334 goto enc4; /* handle leftover bytes */
335 }
336
337 case cDecode + cBits4 + 0:
338 case cDecode + cBits4 + 2:
339 dec4:
340 if (colors & 1) {
341 DECODE_UNALIGNED_LOOP(4, 4, ADD2X4(*p, t));
342 } else {
343 DECODE_ALIGNED_LOOP(ADD2X4(t, ss->prev[ci]));
344 }
345 break;
346
347 case cDecode + cBits4 + 1:
348 DECODE1_LOOP(*p + (s0 << 4), ADD2X4R4(t));
349
350 case cDecode + cBits4 + 3: {
351 uint s1 = ss->prev[1];
352
353 LOOP_BY(1, (t = (s0 << 4) + (s1 >> 4),
354 s0 = s1, s1 = *q = ADD2X4(*p, t)));
355 ss->prev[1] = s1;
356 } break;
357
358 case cDecode + cBits4 + 4: {
359 uint s1 = ss->prev[1];
360
361 LOOP_BY(2,
362 (t = p[-1], s0 = q[-1] = ADD2X4(s0, t),
363 t = *p, s1 = *q = ADD2X4(s1, t)));
364 ss->prev[1] = s1;
365 goto dec4; /* handle leftover bytes */
366 }
367
368 #undef ADD2X4
369 #undef ADD2X4R4
370 #undef SUB2X4
371 #undef SUB2X4R4
372
373 /* 8 bits per component */
374
375 #define ENCODE8(s, d) (q[d] = p[d] - s, s = p[d])
376 #define DECODE8(s, d) q[d] = s += p[d]
377
378 case cEncode + cBits8 + 0:
379 case cEncode + cBits8 + 2:
380 ss->prev[0] = s0;
381 for (; count >= colors; count -= colors)
382 for (ci = 0; ci < colors; ++ci) {
383 *++q = *++p - ss->prev[ci];
384 ss->prev[ci] = *p;
385 }
386 s0 = ss->prev[0];
387 enc8: /* Handle leftover bytes. */
388 if (last && !status)
389 for (ci = 0; ci < count; ++ci)
390 *++q = *++p - ss->prev[ci],
391 ss->prev[ci] = *p;
392 break;
393
394 case cDecode + cBits8 + 0:
395 case cDecode + cBits8 + 2:
396 ss->prev[0] = s0;
397 for (; count >= colors; count -= colors)
398 for (ci = 0; ci < colors; ++ci)
399 *++q = ss->prev[ci] += *++p;
400 s0 = ss->prev[0];
401 dec8: /* Handle leftover bytes. */
402 if (last && !status)
403 for (ci = 0; ci < count; ++ci)
404 *++q = ss->prev[ci] += *++p;
405 break;
406
407 case cEncode + cBits8 + 1:
408 LOOP_BY(1, ENCODE8(s0, 0));
409 break;
410
411 case cDecode + cBits8 + 1:
412 LOOP_BY(1, DECODE8(s0, 0));
413 break;
414
415 case cEncode + cBits8 + 3: {
416 uint s1 = ss->prev[1], s2 = ss->prev[2];
417
418 LOOP_BY(3, (ENCODE8(s0, -2), ENCODE8(s1, -1),
419 ENCODE8(s2, 0)));
420 ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2;
421 goto enc8;
422 }
423
424 case cDecode + cBits8 + 3: {
425 uint s1 = ss->prev[1], s2 = ss->prev[2];
426
427 LOOP_BY(3, (DECODE8(s0, -2), DECODE8(s1, -1),
428 DECODE8(s2, 0)));
429 ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2;
430 goto dec8;
431 } break;
432
433 case cEncode + cBits8 + 4: {
434 uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
435
436 LOOP_BY(4, (ENCODE8(s0, -3), ENCODE8(s1, -2),
437 ENCODE8(s2, -1), ENCODE8(s3, 0)));
438 ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
439 goto enc8;
440 } break;
441
442 case cDecode + cBits8 + 4: {
443 uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
444
445 LOOP_BY(4, (DECODE8(s0, -3), DECODE8(s1, -2),
446 DECODE8(s2, -1), DECODE8(s3, 0)));
447 ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
448 goto dec8;
449 } break;
450
451 #undef ENCODE8
452 #undef DECODE8
453
454 /* 16 bits per component */
455
456 #define ENCODE16(s, d) (ti = ((p[d-1] << 8) + p[d]), s = ti - s,\
457 q[d-1] = s >> 8, q[d] = s & 0xff, s = ti)
458 #define DECODE16(s, d) (s = 0xffff & (s + ((p[d-1] << 8) + p[d])), \
459 q[d-1] = s >> 8, q[d] = s & 0xff)
460
461 case cEncode + cBits16 + 0:
462 case cEncode + cBits16 + 2:
463 ss->prev[0] = s0;
464 for (; count >= colors*2; count -= colors*2)
465 for (ci = 0; ci < colors; ++ci) {
466 uint k;
467 ti = (int)*++p << 8;
468 ti += (int)*++p;
469 k = ti - ss->prev[ci];
470 *++q = k >> 8;
471 *++q = k & 0xff;
472 ss->prev[ci] = ti;
473 }
474 s0 = ss->prev[0];
475 enc16: /* Handle leftover bytes. */
476 if (last && !status)
477 for (ci = 0; ci < count; ++ci)
478 *++q = *++p - ss->prev[ci],
479 ss->prev[ci] = *p;
480 break;
481
482 case cDecode + cBits16 + 0:
483 case cDecode + cBits16 + 2:
484 ss->prev[0] = s0;
485 for (; count >= colors*2; count -= colors*2)
486 for (ci = 0; ci < colors; ++ci) {
487 ti = (int)*++p << 8;
488 ss->prev[ci] += ti + *++p;
489 *++q = ss->prev[ci] >> 8;
490 *++q = ss->prev[ci] & 0xff;
491 }
492 s0 = ss->prev[0];
493 dec16: /* Ignore leftover bytes. */
494 break;
495
496 case cEncode + cBits16 + 1:
497 LOOP_BY(2, ENCODE16(s0, 0));
498 break;
499
500 case cDecode + cBits16 + 1:
501 LOOP_BY(2, DECODE16(s0, 0));
502 break;
503
504 case cEncode + cBits16 + 3: {
505 uint s1 = ss->prev[1], s2 = ss->prev[2];
506
507 LOOP_BY(6, (ENCODE16(s0, -4), ENCODE16(s1, -2),
508 ENCODE16(s2, 0)));
509 ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2;
510 goto enc16;
511 }
512
513 case cDecode + cBits16 + 3: {
514 uint s1 = ss->prev[1], s2 = ss->prev[2];
515
516 LOOP_BY(6, (DECODE16(s0, -4), DECODE16(s1, -2),
517 DECODE16(s2, 0)));
518 ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2;
519 goto dec16;
520 } break;
521
522 case cEncode + cBits16 + 4: {
523 uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
524
525 LOOP_BY(8, (ENCODE16(s0, -6), ENCODE16(s1, -4),
526 ENCODE16(s2, -2), ENCODE16(s3, 0)));
527 ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
528 goto enc16;
529 } break;
530
531 case cDecode + cBits16 + 4: {
532 uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
533
534 LOOP_BY(8, (DECODE16(s0, -6), DECODE16(s1, -4),
535 DECODE16(s2, -2), DECODE16(s3, 0)));
536 ss->prev[0] = s0, ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
537 goto dec16;
538 } break;
539
540 #undef ENCODE16
541 #undef DECODE16
542
543 }
544 #undef LOOP_BY
545 #undef ENCODE1_LOOP
546 #undef DECODE1_LOOP
547 ss->row_left += count; /* leftover bytes are possible */
548 if (ss->row_left == 0) {
549 if (end_mask != 0)
550 *q = (*q & ~end_mask) | (*p & end_mask);
551 if (p < pr->limit && q < pw->limit)
552 goto row;
553 }
554 ss->prev[0] = s0;
555 pr->ptr = p;
556 pw->ptr = q;
557 return status;
558 }
559
560 /* Stream templates */
561 const stream_template s_PDiffE_template = {
562 &st_PDiff_state, s_PDiffE_init, s_PDiff_process, 1, 1, NULL,
563 s_PDiff_set_defaults, s_PDiff_reinit
564 };
565 const stream_template s_PDiffD_template = {
566 &st_PDiff_state, s_PDiffD_init, s_PDiff_process, 1, 1, NULL,
567 s_PDiff_set_defaults, s_PDiff_reinit
568 };
569