1 /* Copyright (C) 2001-2019 Artifex Software, Inc.
2 All Rights Reserved.
3
4 This software is provided AS-IS with no warranty, either express or
5 implied.
6
7 This software is distributed under license and may not be copied,
8 modified or distributed except as expressly authorized under the terms
9 of the license contained in the file LICENSE in this distribution.
10
11 Refer to licensing information at http://www.artifex.com or contact
12 Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato,
13 CA 94945, U.S.A., +1(415)492-9861, for further information.
14 */
15
16
17 /* Testbed implementation of Even Better Screening. */
18
19 /*
20 * Code in this module is covered by US Patents 5,055,942 and
21 * 5,917,614, and corresponding international patents.
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <math.h>
28 #include "evenbetter-rll.h"
29
30 /* Set this define if compiling with AltiVec optimizations. */
31 #define noUSE_AVEC
32
33 /* Set this define if compiling with SSE optimizations. */
34 #define noUSE_SSE2
35
36 #define EVENBETTER_VERSION 133
37
38 #define EVEN_SHIFT 16
39 #define IMO_SHIFT 14
40 #define EVEN_RB_CAP (1 << (EVEN_SHIFT - 2))
41
42 #define FANCY_COUPLING
43
44 #if defined(USE_AVEC) || defined(USE_SSE2)
45 #define USE_VECTOR
46 #endif
47
48 #ifdef USE_AVEC
49 #include "eb_avec.h"
50
51 #endif
52
53 #ifdef USE_SSE2
54 typedef struct _eb_ctx_sse2 eb_ctx_sse2;
55 typedef struct _eb_srcbuf eb_srcbuf;
56
57 int eb_test_sse2(void);
58 int eb_sse2_core(eb_ctx_sse2 *ctx, unsigned char **out, eb_srcbuf *in,
59 int offset);
60 int eb_sse2_rev_rs(eb_ctx_sse2 *ctx, int offset);
61 int eb_sse2_set_daz(void);
62 void eb_sse2_restore_daz(int save_mxcsr);
63
64 struct _eb_ctx_sse2 {
65 int xs;
66 int *iir_line;
67 int *r_line;
68 int *a_line;
69 int *b_line;
70 char *skip_line;
71 int dummy[2];
72 float *luts[4];
73 float e[4];
74 float e_i_1[4];
75 int r[4];
76 int a[4];
77 int b[4];
78 int ones[4];
79 int twos[4];
80 int aspect2[4];
81 float ehi[4];
82 float elo[4];
83 float ohi[4];
84 float r_mul[4];
85 float kernel[4];
86 unsigned int seed1[4];
87 unsigned int seed2[4];
88 };
89
90 struct _eb_srcbuf {
91 float im[64];
92 float rb[64];
93 float rs[64];
94 int dummy[3];
95 };
96
97 #endif
98
99 typedef struct _EBPlaneCtx EBPlaneCtx;
100 typedef unsigned int uint32;
101
102 struct _EvenBetterCtx {
103 int source_width;
104 int dest_width;
105 int n_planes;
106 int levels; /* Number of levels on output, <= 256 */
107 EBPlaneCtx **plane_ctx;
108 int aspect;
109 int *strengths;
110 int even_elo;
111 int even_ehi;
112 int *c_line;
113
114 int even_c1;
115 int do_shadows;
116
117 uint32 seed1;
118 uint32 seed2;
119
120 FILE *dump_file;
121 EbDumpLevel dump_level;
122
123 #ifdef USE_SSE2
124 eb_ctx_sse2 **sse2_ctx;
125 int using_vectors;
126 #endif
127 #ifdef USE_AVEC
128 eb_ctx_avec **avec_ctx;
129 int using_vectors;
130 #endif
131 };
132
133 struct _EBPlaneCtx {
134 int source_width;
135 int dest_width;
136 int *rb_line;
137 int *iir_line;
138 int *r_line;
139 int *a_line;
140 int *b_line;
141 int *r_line_sh;
142 int *a_line_sh;
143 int *b_line_sh;
144 int *lut;
145 int *rb_lut;
146 char *rs_lut;
147 int *white_count_line;
148 };
149
150 void *
eb_malloc_aligned(int size,int align)151 eb_malloc_aligned(int size, int align)
152 {
153 void *result;
154 void *alloced = malloc(size + align);
155 int pad;
156
157 if (alloced == 0)
158 return 0;
159 pad = (((int)(size_t)alloced + 12) & 15) + 4;
160 result = (void *)(pad + (char *)alloced);
161 ((int *)result)[-1] = pad;
162 return result;
163 }
164
165 void
eb_free_aligned(void * p)166 eb_free_aligned(void *p)
167 {
168 int pad = ((int *)p)[-1];
169 free((char*)p - pad);
170 }
171
172 static double
eb_compute_rbscale(const EvenBetterParams * params)173 eb_compute_rbscale(const EvenBetterParams *params)
174 {
175 double rbscale = params->rbscale;
176
177 if (rbscale == 0.0)
178 {
179 rbscale = params->aspect == 1 ? 0.95 :
180 params->aspect == 2 ? 1.8 :
181 params->aspect == 4 ? 3.6 : 1;
182 }
183 return rbscale;
184 }
185
186 static int
eb_compute_randshift(int nl,int rs_base,int do_shadows,int levels)187 eb_compute_randshift(int nl, int rs_base, int do_shadows, int levels)
188 {
189 int rs = rs_base;
190 if ((nl > (90 << (EVEN_SHIFT - 10)) &&
191 nl < (129 << (EVEN_SHIFT - 10))) ||
192 (nl > (162 << (EVEN_SHIFT - 10)) &&
193 nl < (180 << (EVEN_SHIFT - 10))))
194 rs--;
195 else if (nl > (321 << (EVEN_SHIFT - 10)) &&
196 nl < (361 << (EVEN_SHIFT - 10)))
197 {
198 rs--;
199 if (nl > (331 << (EVEN_SHIFT - 10)) &&
200 nl < (351 << (EVEN_SHIFT - 10)))
201 rs--;
202 }
203 else if ((do_shadows ||
204 nl == (levels - 1) << EVEN_SHIFT) &&
205 nl > ((levels - 1) << EVEN_SHIFT) -
206 (1 << (EVEN_SHIFT - 2)))
207 {
208 /* don't add randomness in extreme shadows */
209 }
210 else if ((nl > (3 << (EVEN_SHIFT - 2))))
211 {
212 nl -= (nl + (1 << (EVEN_SHIFT - 2))) & -(1 << (EVEN_SHIFT - 1));
213 if (nl < 0) nl = -nl;
214 if (nl < (1 << (EVEN_SHIFT - 4))) rs--;
215 if (nl < (1 << (EVEN_SHIFT - 5))) rs--;
216 if (nl < (1 << (EVEN_SHIFT - 6))) rs--;
217 }
218 else
219 {
220 if (nl < (3 << (EVEN_SHIFT - 3))) nl += 1 << (EVEN_SHIFT - 2);
221 nl = nl - (1 << (EVEN_SHIFT - 1));
222 if (nl < 0) nl = -nl;
223 if (nl < (1 << (EVEN_SHIFT - 4))) rs--;
224 if (nl < (1 << (EVEN_SHIFT - 5))) rs--;
225 if (nl < (1 << (EVEN_SHIFT - 6))) rs--;
226 }
227 return rs;
228 }
229
230 #ifdef USE_SSE2
231 static eb_ctx_sse2 *
eb_ctx_sse2_new(const EvenBetterParams * params,int start_plane,int end_plane)232 eb_ctx_sse2_new(const EvenBetterParams *params, int start_plane, int end_plane)
233 {
234 int xs = params->source_width;
235 int aspect2 = params->aspect * params->aspect;
236 eb_ctx_sse2 *ctx;
237 int i;
238 double im_scale;
239 float r_mul = 1.0 / (params->aspect * (1 << (6 - params->even_c1_scale)));
240 double rbscale = eb_compute_rbscale(params);
241 int rs_base;
242
243 ctx = (eb_ctx_sse2 *)eb_malloc_aligned(sizeof(eb_ctx_sse2), 16);
244 ctx->xs = xs;
245 for (i = 0; i < 4; i++)
246 {
247 ctx->e[i] = 0.0;
248 ctx->e_i_1[i] = 0.0;
249 ctx->r[i] = 0;
250 ctx->a[i] = 1;
251 ctx->b[i] = aspect2;
252 ctx->ones[i] = 1;
253 ctx->twos[i] = 2;
254 ctx->aspect2[i] = aspect2;
255 ctx->ohi[i] = params->levels - 1;
256 ctx->ehi[i] = 1.1;
257 ctx->elo[i] = -0.1;
258 ctx->r_mul[i] = r_mul;
259 ctx->seed1[i] = (i << 8) + 0x7000;
260 ctx->seed2[i] = (i << 16) + 0x9000;
261 }
262 ctx->kernel[0] = 1.0 / 16;
263 ctx->kernel[1] = 3.0 / 16;
264 ctx->kernel[2] = 5.0 / 16;
265 ctx->kernel[3] = 7.0 / 16;
266
267 im_scale = (params->levels - 1) * 1.0 / (1 << 24);
268 rs_base = 35 - EVEN_SHIFT - params->rand_scale;
269
270 for (i = start_plane; i < end_plane; i++)
271 {
272 float *lut = (float *)malloc((ET_SRC_MAX + 1) * sizeof(float) * 3);
273 int j;
274 ctx->luts[i - start_plane] = lut;
275
276 for (j = 0; j < ET_SRC_MAX + 1; j++)
277 {
278 double g = ((1 << 24) - params->luts[i][j]) * im_scale;
279 int nl, rs;
280
281 lut[j * 3] = g;
282 if (g == 0.0)
283 lut[j * 3 + 1] = 0.5;
284 else
285 lut[j * 3 + 1] = 0.5 - r_mul * rbscale / g;
286
287 nl = (params->levels - 1 - g) * (1 << EVEN_SHIFT);
288 rs = eb_compute_randshift(nl, rs_base,
289 params->do_shadows, params->levels);
290
291 lut[j * 3 + 2] = 1.0 / (1 << EVEN_SHIFT) / (1 << rs);
292 }
293 }
294 for (i = i - start_plane; i < 4; i++)
295 ctx->luts[i] = NULL;
296
297 ctx->iir_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16);
298 ctx->a_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16);
299 ctx->b_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16);
300 ctx->r_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16);
301 for (i = 0; i < (xs + 32) * 4; i++)
302 {
303 ((float *)ctx->iir_line)[i] = 0;
304 ctx->a_line[i] = 1;
305 ctx->b_line[i] = aspect2;
306 ctx->r_line[i] = 0;
307 }
308
309 ctx->skip_line = (char *)malloc((xs + 15) & -16);
310
311 return ctx;
312 }
313
314 static void
eb_ctx_sse2_free(eb_ctx_sse2 * ctx)315 eb_ctx_sse2_free(eb_ctx_sse2 *ctx)
316 {
317 int i;
318
319 for (i = 0; i < 4; i++)
320 free(ctx->luts[i]);
321 eb_free_aligned(ctx->iir_line);
322 eb_free_aligned(ctx->a_line);
323 eb_free_aligned(ctx->b_line);
324 eb_free_aligned(ctx->r_line);
325 free(ctx->skip_line);
326 eb_free_aligned(ctx);
327 }
328 #endif
329
330 #ifdef USE_AVEC
331 static eb_ctx_avec *
eb_ctx_avec_new(const EvenBetterParams * params,int start_plane,int end_plane)332 eb_ctx_avec_new(const EvenBetterParams *params, int start_plane, int end_plane)
333 {
334 int xs = params->source_width;
335 int aspect2 = params->aspect * params->aspect;
336 eb_ctx_avec *ctx;
337 int i;
338 double im_scale;
339 double k;
340 float imscale1, imscale2, rbmul, rsbase;
341 float r_mul = 1.0 / (params->aspect * (1 << (6 - params->even_c1_scale)));
342 double rbscale = eb_compute_rbscale(params);
343 vector unsigned int zero = vec_splat_u32(0);
344 const vector float kernel = { 1.0 / 16, 3.0 / 16, 5.0 / 16, 7.0 / 16 };
345 vector float almostone = { 255.0/256, 255.0/256, 255.0/256, 255.0/256 };
346 int rs_base;
347
348 ctx = (eb_ctx_avec *)eb_malloc_aligned(sizeof(eb_ctx_avec), 16);
349 ctx->xs = xs;
350
351 ctx->e = (vector float) zero;
352 ctx->e_i_1 = (vector float) zero;
353 ctx->r = zero;
354 ctx->a = zero;
355 im_scale = (params->levels - 1) * (1.0 / (1 << 24));
356 rs_base = 35 - EVEN_SHIFT - params->rand_scale;
357
358 if (params->gamma == 1.0)
359 k = 0;
360 else if (params->gamma == 1.8)
361 k = 0.835;
362 else if (params->gamma == 2.0)
363 k = 1.0;
364 else
365 /* this shouldn't happen! */
366 k = 0;
367
368 for (;;)
369 {
370 vector float foff, f0, f1;
371
372 imscale1 = (1 - k) * (params->levels - 1) * (256.0 / 255.0);
373 imscale2 = k * (params->levels - 1) * sqrt(256.0 / 255.0);
374 for (i = 0; i < 4; i++)
375 {
376 ((float *)&ctx->imscale1)[i] = imscale1;
377 ((float *)&ctx->imscale2)[i] = imscale2;
378 }
379 f0 = vec_rsqrte(almostone);
380 f0 = vec_madd(f0, almostone, (vector float)zero);
381 f1 = vec_madd(f0, ctx->imscale2, (vector float)zero);
382 foff = vec_madd(almostone, ctx->imscale1, f1);
383 f1 = vec_nmsub(f0, ctx->imscale2, foff);
384 f1 = vec_nmsub(almostone, ctx->imscale1, f1);
385 if (vec_all_eq(f1, (vector float)zero))
386 {
387 ctx->foff = foff;
388 break;
389 }
390 k += 1e-5;
391 }
392 rbmul = -r_mul * rbscale;
393 rsbase = 1.0 / (1 << EVEN_SHIFT) / (1 << rs_base);
394 for (i = 0; i < 4; i++)
395 {
396 ((int *)&ctx->b)[i] = aspect2;
397 ((int *)&ctx->aspect2)[i] = aspect2;
398 ((int *)&ctx->seed1)[i] = (i << 8) + 0x7000;
399 ((int *)&ctx->seed2)[i] = (i << 16) + 0x9000;
400 ((float *)&ctx->ohi)[i] = params->levels - 1;
401 ((float *)&ctx->ehi)[i] = 1.1;
402 ((float *)&ctx->elo)[i] = -0.1;
403 ((float *)&ctx->r_mul)[i] = r_mul;
404 ((float *)&ctx->rsbase)[i] = rsbase;
405 ((float *)&ctx->rbmul)[i] = rbmul;
406 }
407 ctx->kernel = kernel;
408
409 rs_base = 35 - EVEN_SHIFT - params->rand_scale;
410
411 for (i = start_plane; i < end_plane; i++)
412 {
413 float *lut = (float *)malloc((ET_SRC_MAX + 1) * sizeof(float) * 3);
414 int j;
415 ctx->luts[i - start_plane] = lut;
416
417 for (j = 0; j < ET_SRC_MAX + 1; j++)
418 {
419 double g = ((1 << 24) - params->luts[i][j]) * im_scale;
420 int nl, rs;
421
422 lut[j * 3] = g;
423 if (g == 0.0)
424 lut[j * 3 + 1] = 0.5;
425 else
426 lut[j * 3 + 1] = 0.5 - r_mul * rbscale / g;
427 nl = (params->levels - 1 - g) * (1 << EVEN_SHIFT);
428 rs = eb_compute_randshift(nl, rs_base,
429 params->do_shadows, params->levels);
430
431 lut[j * 3 + 2] = 1.0 / (1 << EVEN_SHIFT) / (1 << rs);
432 }
433 }
434 for (i = i - start_plane; i < 4; i++)
435 ctx->luts[i] = NULL;
436
437 ctx->iir_line = (vector float *)eb_malloc_aligned(16 * (xs + 32), 16);
438 ctx->a_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16);
439 ctx->b_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16);
440 ctx->r_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16);
441 for (i = 0; i < (xs + 32) * 4; i++)
442 {
443 ((float *)ctx->iir_line)[i] = 0;
444 ((int *)ctx->a_line)[i] = 1;
445 ((int *)ctx->b_line)[i] = aspect2;
446 ((int *)ctx->r_line)[i] = 0;
447 }
448
449 ctx->skip_line = (char *)malloc((xs + 15) & -16);
450
451 return ctx;
452 }
453
454 static void
eb_ctx_avec_free(eb_ctx_avec * ctx)455 eb_ctx_avec_free(eb_ctx_avec *ctx)
456 {
457 int i;
458
459 for (i = 0; i < 4; i++)
460 free(ctx->luts[i]);
461 eb_free_aligned(ctx->iir_line);
462 eb_free_aligned(ctx->a_line);
463 eb_free_aligned(ctx->b_line);
464 eb_free_aligned(ctx->r_line);
465 free(ctx->skip_line);
466 eb_free_aligned(ctx);
467 }
468
469 #endif
470
471 #ifdef USE_VECTOR
472 static int
even_better_line_vector(EvenBetterCtx * ebc,uchar ** dest,const ET_Rll * const * src)473 even_better_line_vector(EvenBetterCtx *ebc, uchar **dest,
474 const ET_Rll *const *src)
475 {
476 int n_planes = ebc->n_planes;
477 int xd = ebc->dest_width;
478 int strip;
479 eb_srcbuf sb_alloc;
480 eb_srcbuf *srcbuf;
481 uchar dummy_a[32];
482 uchar *dummy_dst = (uchar *)(((int)dummy_a + 15) & -16);
483 #ifdef USE_SSE2
484 int save_mxcsr = eb_sse2_set_daz();
485 #endif
486
487 srcbuf = (eb_srcbuf *)(((int)&sb_alloc + 12) & -16);
488
489 for (strip = 0; strip < n_planes; strip += 4)
490 {
491 #ifdef USE_AVEC
492 eb_ctx_avec *ctx = ebc->avec_ctx[strip >> 2];
493 #endif
494 #ifdef USE_SSE2
495 eb_ctx_sse2 *ctx = ebc->sse2_ctx[strip >> 2];
496 #endif
497 uchar *destbufs[4];
498 const ET_Rll *const *sbuf = src + strip;
499 int count[4];
500 int src_idx[4];
501 int plane_idx, last_plane;
502 float im[4], rb[4], rs[4];
503 int i;
504
505 last_plane = n_planes - strip < 4 ? n_planes - strip : 4;
506 for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
507 {
508 count[plane_idx] = 0;
509 src_idx[plane_idx] = 0;
510 destbufs[plane_idx] = dest[plane_idx + strip];
511 }
512 for (; plane_idx < 4; plane_idx++)
513 {
514 int j;
515
516 for (j = 0; j < 16; j++)
517 {
518 ((float *)&srcbuf->im)[j * 4 + plane_idx] = 0.0;
519 ((float *)&srcbuf->rb)[j * 4 + plane_idx] = 0.0;
520 ((float *)&srcbuf->rs)[j * 4 + plane_idx] = 0.0;
521 }
522 }
523 for (i = 0; i < xd; i += 16)
524 {
525 int jmax = (xd - i) > 16 ? 16 : xd - i;
526 int skip = 1;
527 int j;
528
529 for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
530 {
531 if (count[plane_idx] < 16 || im[plane_idx] != 0.0)
532 {
533 skip = 0;
534 break;
535 }
536 }
537 ctx->skip_line[i >> 4] = skip;
538
539 if (skip)
540 {
541 /* all white */
542
543 for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
544 {
545 uchar *dst_ptr = destbufs[plane_idx];
546 if (jmax == 16)
547 {
548 ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0;
549 ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0;
550 ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0;
551 ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0;
552 }
553 else
554 {
555 for (j = 0; j < jmax; j++)
556 dst_ptr[i + j] = 0;
557 }
558 count[plane_idx] -= jmax;
559 }
560 }
561 else
562 {
563 for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
564 {
565 const float *lut = ctx->luts[plane_idx];
566 float imp = im[plane_idx];
567 float rbp = rb[plane_idx];
568 float rsp = rs[plane_idx];
569 for (j = 0; j < jmax; j++)
570 {
571 if (count[plane_idx] == 0)
572 {
573 const ET_Rll *src_p = sbuf[plane_idx] +
574 src_idx[plane_idx]++;
575 ET_SrcPixel src_pixel = src_p->value;
576 count[plane_idx] = src_p->length;
577 imp = lut[src_pixel * 3];
578 rbp = lut[src_pixel * 3 + 1];
579 rsp = lut[src_pixel * 3 + 2];
580 }
581 ((float *)&srcbuf->im)[j * 4 + plane_idx] = imp;
582 ((float *)&srcbuf->rb)[j * 4 + plane_idx] = rbp;
583 ((float *)&srcbuf->rs)[j * 4 + plane_idx] = rsp;
584 count[plane_idx]--;
585 }
586 im[plane_idx] = imp;
587 rb[plane_idx] = rbp;
588 rs[plane_idx] = rsp;
589 }
590 for (; plane_idx < 4; plane_idx++)
591 {
592 destbufs[plane_idx] = dummy_dst - i;
593 }
594 #ifdef USE_AVEC
595 eb_avec_core(ctx, (vector unsigned char **)destbufs, srcbuf, i);
596 #endif
597 #ifdef USE_SSE2
598 eb_sse2_core(ctx, destbufs, srcbuf, i);
599 #endif
600 }
601 }
602
603 for (i = xd & -16; i >= 0; i -= 16)
604 {
605 if (!ctx->skip_line[i >> 4])
606 {
607 #ifdef USE_AVEC
608 eb_avec_rev_rs(ctx, i + 15);
609 #endif
610 #ifdef USE_SSE2
611 eb_sse2_rev_rs(ctx, i + 15);
612 #endif
613 }
614 }
615 }
616 #ifdef USE_SSE2
617 eb_sse2_restore_daz(save_mxcsr);
618 #endif
619 return 0;
620 }
621 #endif
622
623 #ifdef USE_AVEC
624 static int
even_better_line_fastprep(EvenBetterCtx * ebc,uchar ** dest,const ET_SrcPixel * const * src)625 even_better_line_fastprep(EvenBetterCtx *ebc, uchar **dest,
626 const ET_SrcPixel *const *src)
627 {
628 int n_planes = ebc->n_planes;
629 int xd = ebc->dest_width;
630 int strip;
631 eb_srcbuf sb_alloc;
632 eb_srcbuf *srcbuf;
633 uchar dummy_a[32];
634 uchar *dummy_dst = (uchar *)(((int)dummy_a + 15) & -16);
635
636 srcbuf = (eb_srcbuf *)(((int)&sb_alloc + 12) & -16);
637
638 for (strip = 0; strip < n_planes; strip += 4)
639 {
640 #ifdef USE_AVEC
641 eb_ctx_avec *ctx = ebc->avec_ctx[strip >> 2];
642 #endif
643 #ifdef USE_SSE2
644 eb_ctx_sse2 *ctx = ebc->sse2_ctx[strip >> 2];
645 #endif
646 uchar *destbufs[4];
647 const ET_SrcPixel *const *sbuf = src + strip;
648 int plane_idx, last_plane;
649 int i;
650
651 last_plane = n_planes - strip < 4 ? n_planes - strip : 4;
652 for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
653 {
654 destbufs[plane_idx] = dest[plane_idx + strip];
655 }
656 for (i = 0; i < xd; i += 16)
657 {
658 int noskip;
659 noskip = eb_avec_prep_srcbuf(ctx, last_plane, srcbuf, sbuf, i);
660 ctx->skip_line[i >> 4] = noskip;
661 if (noskip)
662 {
663 for (plane_idx = last_plane; plane_idx < 4; plane_idx++)
664 destbufs[plane_idx] = dummy_dst - i;
665 eb_avec_core(ctx, (vector unsigned char **)destbufs, srcbuf, i);
666 }
667 else
668 {
669 /* all white */
670
671 for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
672 {
673 uchar *dst_ptr = destbufs[plane_idx];
674 ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0;
675 ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0;
676 ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0;
677 ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0;
678 }
679 }
680 }
681
682 for (i = xd & -16; i >= 0; i -= 16)
683 {
684 if (ctx->skip_line[i >> 4])
685 {
686 #ifdef USE_AVEC
687 eb_avec_rev_rs(ctx, i + 15);
688 #endif
689 #ifdef USE_SSE2
690 eb_sse2_rev_rs(ctx, i + 15);
691 #endif
692 }
693 }
694 }
695 return 0;
696 }
697 #endif
698
699 /* Maximum number of planes, but actually we want to dynamically
700 allocate all scratch buffers that depend on this. */
701 #define M 16
702
703 static void
even_better_line_hi(EvenBetterCtx * ebc,uchar ** dest,const ET_Rll * const * src)704 even_better_line_hi (EvenBetterCtx *ebc, uchar **dest,
705 const ET_Rll *const *src)
706 {
707 int a[M], b[M];
708 int e_1_0[M], e_m1_1[M], e_0_1[M], e_1_1[M];
709 int iml[M], rbl[M];
710 int i, j;
711 int im;
712 int *pa, *pb, *piir, *pr;
713 int r[M], rg;
714 int xd;
715 uint32 seed1 = ebc->seed1;
716 uint32 seed2 = ebc->seed2;
717 uint32 sum;
718 int plane_idx;
719 int r_scratch[M];
720 int n_planes = ebc->n_planes;
721 int levels = ebc->levels;
722 #ifdef OLD_QUANT
723 int dith_mul = levels << 8;
724 #else
725 int dith_mul = (levels - 1) << 8;
726 #endif
727 int imo_mul = (1 << (EVEN_SHIFT + IMO_SHIFT)) / (levels - 1);
728 int aspect2 = ebc->aspect * ebc->aspect;
729 int *strengths = ebc->strengths;
730 int even_elo = ebc->even_elo;
731 int even_ehi = ebc->even_ehi;
732 int coupling;
733 int *c_line = ebc->c_line;
734 int even_c1 = ebc->even_c1;
735 int rand_shift;
736 int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1);
737 int count[M], src_idx[M];
738 int rs[M];
739
740 xd = ebc->dest_width;
741
742 memset(rbl, 0x00, M * sizeof(int));
743 memset(iml, 0x00, M * sizeof(int));
744 memset(rs, 0x00, M * sizeof(int));
745
746 for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
747 {
748 a[plane_idx] = 1;
749 b[plane_idx] = aspect2;
750 r[plane_idx] = 0;
751 e_0_1[plane_idx] = 0;
752 e_1_0[plane_idx] = 0;
753 e_1_1[plane_idx] = 0;
754 count[plane_idx] = 0;
755 src_idx[plane_idx] = 0;
756 }
757
758 coupling = 0;
759
760 for (i = 0; i < xd;)
761 {
762 int work_planes[M];
763 int n_work = 0;
764 int work_idx;
765 int jmax;
766
767 jmax = (xd - i) > 16 ? 16 : xd - i;
768
769 for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
770 {
771 EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
772 int *wcl = ctx->white_count_line;
773 if (count[plane_idx] >= 16 && iml[plane_idx] == 0)
774 wcl[i >> 4]++;
775 else
776 wcl[i >> 4] = 0;
777 if (wcl[i >> 4] > 15)
778 {
779 uchar *dst_ptr = dest[plane_idx];
780 if (jmax == 16)
781 {
782 ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0;
783 ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0;
784 ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0;
785 ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0;
786 }
787 else
788 {
789 for (j = 0; j < jmax; j++)
790 dst_ptr[i + j] = 0;
791 }
792 count[plane_idx] -= jmax;
793 }
794 else
795 {
796 work_planes[n_work++] = plane_idx;
797 }
798 }
799
800 if (n_work == 0)
801 {
802 /* all planes were white */
803 i += jmax;
804 continue;
805 }
806
807 for (j = 0; j < jmax; j++)
808 {
809 #ifdef FANCY_COUPLING
810 coupling += c_line[i];
811 #else
812 coupling = 0;
813 #endif
814 /* Lookup image data and compute R for all planes. */
815 for (work_idx = 0; work_idx < n_work; work_idx++)
816 {
817 int plane_idx = work_planes[work_idx];
818 EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
819 ET_SrcPixel src_pixel;
820 int new_r;
821
822 pr = ctx->r_line;
823 pa = ctx->a_line;
824 pb = ctx->b_line;
825 if (count[plane_idx] == 0)
826 {
827 const ET_Rll *src_p = src[plane_idx] + src_idx[plane_idx]++;
828 int *lut = ctx->lut;
829 int *rblut = ctx->rb_lut;
830 char *rslut = ctx->rs_lut;
831
832 count[plane_idx] = src_p->length;
833 src_pixel = src_p->value;
834 iml[plane_idx] = lut[src_pixel];
835 rbl[plane_idx] = rblut[src_pixel];
836 rs[plane_idx] = rslut[src_pixel];
837 }
838 count[plane_idx]--;
839
840 if (r[plane_idx] + a[plane_idx] < pr[i])
841 {
842 r[plane_idx] += a[plane_idx];
843 a[plane_idx] += 2;
844 }
845 else
846 {
847 a[plane_idx] = pa[i];
848 b[plane_idx] = pb[i];
849 r[plane_idx] = pr[i];
850 }
851 if (iml[plane_idx] == 0)
852 {
853 r_scratch[plane_idx] = 0;
854 }
855 else
856 {
857 int r_tmp;
858 const int r_max = 0;
859 new_r = r[plane_idx];
860 if (new_r > even_rlimit)
861 new_r = even_rlimit;
862 /* Should we store back with the limit? */
863
864 rg = new_r << (EVEN_SHIFT - even_c1);
865 r_tmp = rg - rbl[plane_idx];
866 if (r_tmp > r_max) r_tmp >>= 3;
867 r_scratch[plane_idx] = r_tmp;
868 }
869 }
870
871 /* Dither each plane. */
872 for (work_idx = 0; work_idx < n_work; work_idx++)
873 {
874 int plane_idx = work_planes[work_idx];
875 EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
876 uchar *dst_ptr = dest[plane_idx];
877 int new_e_1_0;
878 int coupling_contribution;
879
880 pr = ctx->r_line;
881 pa = ctx->a_line;
882 pb = ctx->b_line;
883 piir = ctx->iir_line;
884
885 im = iml[plane_idx];
886 e_m1_1[plane_idx] = e_0_1[plane_idx];
887 e_0_1[plane_idx] = e_1_1[plane_idx];
888 e_1_1[plane_idx] = i == xd - 1 ? 0 : piir[i + 1];
889 new_e_1_0 = ((e_1_0[plane_idx] * 7 + e_m1_1[plane_idx] * 3 +
890 e_0_1[plane_idx] * 5 + e_1_1[plane_idx] * 1) >> 4);
891 if (im == 0)
892 {
893 dst_ptr[i] = 0;
894 }
895 else
896 {
897 int err;
898 int imo;
899
900 err = new_e_1_0;
901
902 err += r_scratch[plane_idx];
903
904 /* Add the two seeds together */
905 sum = seed1 + seed2;
906
907 /* If the add generated a carry, increment
908 * the result of the addition.
909 */
910 if (sum < seed1 || sum < seed2) sum++;
911
912 /* Seed2 becomes old seed1, seed1 becomes result */
913 seed2 = seed1;
914 seed1 = sum;
915
916 rand_shift = rs[plane_idx];
917 err -= (sum >> rand_shift) - (0x80000000 >> rand_shift);
918
919 if (err < even_elo)
920 err = even_elo;
921
922 else if (err > even_ehi)
923 err = even_ehi;
924
925 #if 1
926 err += coupling;
927 #endif
928
929 #ifdef OLD_QUANT
930 imo = ((err + im) * dith_mul) >> (EVEN_SHIFT + 8);
931 #else
932 imo = ((err + im) * dith_mul + (1 << (EVEN_SHIFT + 7))) >> (EVEN_SHIFT + 8);
933 #endif
934 if (imo < 0) imo = 0;
935 else if (imo > levels - 1) imo = levels - 1;
936 dst_ptr[i] = imo;
937 coupling_contribution = im - ((imo * imo_mul) >> IMO_SHIFT);
938 new_e_1_0 += coupling_contribution;
939 coupling += (coupling_contribution * strengths[plane_idx]) >> 8;
940 }
941 if (dst_ptr[i] != 0)
942 {
943 a[plane_idx] = 1;
944 b[plane_idx] = aspect2;
945 r[plane_idx] = 0;
946 }
947 pa[i] = a[plane_idx];
948 pb[i] = b[plane_idx];
949 pr[i] = r[plane_idx];
950 piir[i] = new_e_1_0;
951 e_1_0[plane_idx] = new_e_1_0;
952 }
953 #ifdef FANCY_COUPLING
954 coupling = coupling >> 1;
955 c_line[i] = coupling;
956 #endif
957 i++;
958 }
959 }
960
961 /* Note: this isn't white optimized, but the payoff is probably not
962 that important. */
963 #ifdef FANCY_COUPLING
964 coupling = 0;
965 for (i = xd - 1; i >= 0; i--)
966 {
967 coupling = (coupling + c_line[i]) >> 1;
968 c_line[i] = (coupling - (coupling >> 4));
969 }
970 #endif
971
972 /* Update distances. */
973 for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
974 {
975 EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
976 int *wcl = ctx->white_count_line;
977 int av, bv, rv;
978 int jmax;
979
980 pr = ctx->r_line;
981 pa = ctx->a_line;
982 pb = ctx->b_line;
983
984 av = 1;
985 bv = 1;
986 rv = 0;
987 jmax = ((xd - 1) & 15) + 1;
988 for (i = xd - 1; i >= 0;)
989 {
990 if (wcl[i >> 4] < 16)
991 {
992 for (j = 0; j < jmax; j++)
993 {
994 if (rv + bv + av < pr[i] + pb[i])
995 {
996 rv += av;
997 av += 2;
998 }
999 else
1000 {
1001 rv = pr[i];
1002 av = pa[i];
1003 bv = pb[i];
1004 }
1005 if (rv > even_rlimit) rv = even_rlimit;
1006 pa[i] = av;
1007 pb[i] = bv + (aspect2 << 1);
1008 pr[i] = rv + bv;
1009 i--;
1010 }
1011 }
1012 else
1013 i -= jmax;
1014 jmax = 16;
1015 }
1016 }
1017
1018 ebc->seed1 = seed1;
1019 ebc->seed2 = seed2;
1020 }
1021
1022 static void
even_better_line_both(EvenBetterCtx * ebc,uchar ** dest,const ET_Rll * const * src)1023 even_better_line_both (EvenBetterCtx *ebc, uchar **dest,
1024 const ET_Rll *const *src)
1025 {
1026 #if 0
1027 int a[M], b[M];
1028 int a_sh[M], b_sh[M];
1029 int e_1_0[M], e_m1_1[M], e_0_1[M], e_1_1[M];
1030 int imraw[M];
1031 int iml[M];
1032 int i;
1033 int im;
1034 int *lut;
1035 const ET_SrcPixel *ps;
1036 int *pa, *pb, *piir, *pr;
1037 int *pa_sh, *pb_sh, *pr_sh;
1038 int r[M], rb, rg;
1039 int r_sh[M];
1040 int *rblut;
1041 int xd, xrem, xs;
1042 uint32 seed1 = ebc->seed1;
1043 uint32 seed2 = ebc->seed2;
1044 uint32 sum;
1045 int plane_idx;
1046 int r_scratch[M];
1047 int src_idx;
1048 int n_planes = ebc->n_planes;
1049 int levels = ebc->levels;
1050 #ifdef OLD_QUANT
1051 int dith_mul = levels << 8;
1052 #else
1053 int dith_mul = (levels - 1) << 8;
1054 #endif
1055 int imo_mul = (1 << (EVEN_SHIFT + IMO_SHIFT)) / (levels - 1);
1056 int aspect2 = ebc->aspect * ebc->aspect;
1057 int *strengths = ebc->strengths;
1058 int even_elo= ebc->even_elo;
1059 int even_ehi= ebc->even_ehi;
1060 int coupling;
1061 int *c_line = ebc->c_line;
1062 int even_c1 = ebc->even_c1;
1063 int rand_shift = ebc->rand_shift;
1064 int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1);
1065
1066 xs = ebc->source_width;
1067 xd = ebc->dest_width;
1068 xrem = xd - xs;
1069
1070 for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1071 {
1072 a[plane_idx] = 1;
1073 b[plane_idx] = aspect2;
1074 a_sh[plane_idx] = 1;
1075 b_sh[plane_idx] = aspect2;
1076 r[plane_idx] = 0;
1077 r_sh[plane_idx] = 0;
1078 e_0_1[plane_idx] = 0;
1079 e_1_0[plane_idx] = 0;
1080 e_1_1[plane_idx] = 0;
1081 }
1082
1083 coupling = 0;
1084
1085 src_idx = 0;
1086 for (i = 0; i < xd; i++)
1087 {
1088 #ifdef FANCY_COUPLING
1089 coupling += c_line[i];
1090 #else
1091 coupling = 0;
1092 #endif
1093
1094 xrem += xs;
1095 if (xrem >= xd)
1096 {
1097 for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1098 {
1099 ps = src[plane_idx];
1100 imraw[plane_idx] = ps[src_idx];
1101 }
1102 src_idx++;
1103 xrem -= xd;
1104 }
1105
1106 /* Lookup image data and compute R for all planes. */
1107 for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1108 {
1109 EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
1110 ET_SrcPixel src_pixel;
1111 int new_r;
1112
1113 pr = ctx->r_line;
1114 pa = ctx->a_line;
1115 pb = ctx->b_line;
1116 pr_sh = ctx->r_line_sh;
1117 pa_sh = ctx->a_line_sh;
1118 pb_sh = ctx->b_line_sh;
1119 lut = ctx->lut;
1120 rblut = ctx->rb_lut;
1121 src_pixel = imraw[plane_idx];
1122
1123 im = lut[src_pixel];
1124 iml[plane_idx] = im;
1125 rb = rblut[src_pixel];
1126 if (r[plane_idx] + a[plane_idx] < pr[i])
1127 {
1128 r[plane_idx] += a[plane_idx];
1129 a[plane_idx] += 2;
1130 }
1131 else
1132 {
1133 a[plane_idx] = pa[i];
1134 b[plane_idx] = pb[i];
1135 r[plane_idx] = pr[i];
1136 }
1137 if (r_sh[plane_idx] + a_sh[plane_idx] < pr_sh[i])
1138 {
1139 r_sh[plane_idx] += a_sh[plane_idx];
1140 a_sh[plane_idx] += 2;
1141 }
1142 else
1143 {
1144 a_sh[plane_idx] = pa_sh[i];
1145 b_sh[plane_idx] = pb_sh[i];
1146 r_sh[plane_idx] = pr_sh[i];
1147 }
1148 if (im == 0 || im == (1 << EVEN_SHIFT))
1149 {
1150 r_scratch[plane_idx] = 0;
1151 }
1152 else
1153 {
1154 new_r = r[plane_idx];
1155 if (new_r > even_rlimit)
1156 new_r = even_rlimit;
1157 /* Should we store back with the limit? */
1158 rg = new_r << (EVEN_SHIFT - even_c1);
1159
1160 new_r = r_sh[plane_idx];
1161 if (new_r > even_rlimit)
1162 new_r = even_rlimit;
1163 rg -= new_r << (EVEN_SHIFT - even_c1);
1164 r_scratch[plane_idx] = rg - rb;
1165 }
1166 }
1167
1168 /* Dither each plane. */
1169 for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1170 {
1171 EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
1172 uchar *dst_ptr = dest[plane_idx];
1173 int new_e_1_0;
1174 int coupling_contribution;
1175
1176 pr = ctx->r_line;
1177 pa = ctx->a_line;
1178 pb = ctx->b_line;
1179 pr_sh = ctx->r_line_sh;
1180 pa_sh = ctx->a_line_sh;
1181 pb_sh = ctx->b_line_sh;
1182 piir = ctx->iir_line;
1183
1184 im = iml[plane_idx];
1185 e_m1_1[plane_idx] = e_0_1[plane_idx];
1186 e_0_1[plane_idx] = e_1_1[plane_idx];
1187 e_1_1[plane_idx] = i == xd - 1 ? 0 : piir[i + 1];
1188 new_e_1_0 = ((e_1_0[plane_idx] * 7 + e_m1_1[plane_idx] * 3 +
1189 e_0_1[plane_idx] * 5 + e_1_1[plane_idx] * 1) >> 4);
1190 if (im == 0)
1191 {
1192 dst_ptr[i] = 0;
1193 }
1194 else
1195 {
1196 int err;
1197 int imo;
1198
1199 err = new_e_1_0;
1200
1201 err += r_scratch[plane_idx];
1202
1203 /* Add the two seeds together */
1204 sum = seed1 + seed2;
1205
1206 /* If the add generated a carry, increment
1207 * the result of the addition.
1208 */
1209 if (sum < seed1 || sum < seed2) sum++;
1210
1211 /* Seed2 becomes old seed1, seed1 becomes result */
1212 seed2 = seed1;
1213 seed1 = sum;
1214
1215 err -= (sum >> rand_shift) - (0x80000000 >> rand_shift);
1216
1217 if (err < even_elo)
1218 err = even_elo;
1219
1220 else if (err > even_ehi)
1221 err = even_ehi;
1222
1223 #if 1
1224 err += coupling;
1225 #endif
1226
1227 #ifdef OLD_QUANT
1228 imo = ((err + im) * dith_mul) >> (EVEN_SHIFT + 8);
1229 #else
1230 imo = ((err + im) * dith_mul + (1 << (EVEN_SHIFT + 7))) >> (EVEN_SHIFT + 8);
1231 #endif
1232 if (imo < 0) imo = 0;
1233 else if (imo > levels - 1) imo = levels - 1;
1234 dst_ptr[i] = imo;
1235 coupling_contribution = im - ((imo * imo_mul) >> IMO_SHIFT);
1236 new_e_1_0 += coupling_contribution;
1237 coupling += (coupling_contribution * strengths[plane_idx]) >> 8;
1238 }
1239 if (dst_ptr[i] != 0)
1240 {
1241 a[plane_idx] = 1;
1242 b[plane_idx] = aspect2;
1243 r[plane_idx] = 0;
1244 }
1245 if (dst_ptr[i] != levels - 1)
1246 {
1247 a_sh[plane_idx] = 1;
1248 b_sh[plane_idx] = aspect2;
1249 r_sh[plane_idx] = 0;
1250 }
1251 pa[i] = a[plane_idx];
1252 pb[i] = b[plane_idx];
1253 pr[i] = r[plane_idx];
1254 pa_sh[i] = a_sh[plane_idx];
1255 pb_sh[i] = b_sh[plane_idx];
1256 pr_sh[i] = r_sh[plane_idx];
1257 piir[i] = new_e_1_0;
1258 e_1_0[plane_idx] = new_e_1_0;
1259 }
1260 #ifdef FANCY_COUPLING
1261 coupling = coupling >> 1;
1262 c_line[i] = coupling;
1263 #endif
1264 }
1265
1266 #ifdef FANCY_COUPLING
1267 coupling = 0;
1268 for (i = xd - 1; i >= 0; i--)
1269 {
1270 if (plane_idx == 0)
1271 {
1272 coupling = (coupling + c_line[i]) >> 1;
1273 c_line[i] = (coupling - (coupling >> 4));
1274 }
1275 }
1276 #endif
1277
1278 /* Update distances. */
1279 for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1280 {
1281 EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
1282 int av, bv, rv;
1283 int av_sh, bv_sh, rv_sh;
1284
1285 pr = ctx->r_line;
1286 pa = ctx->a_line;
1287 pb = ctx->b_line;
1288 pr_sh = ctx->r_line_sh;
1289 pa_sh = ctx->a_line_sh;
1290 pb_sh = ctx->b_line_sh;
1291
1292 av = 1;
1293 bv = 1;
1294 rv = 0;
1295 av_sh = 1;
1296 bv_sh = 1;
1297 rv_sh = 0;
1298 for (i = xd - 1; i >= 0; i--)
1299 {
1300 if (rv + bv + av < pr[i] + pb[i])
1301 {
1302 rv += av;
1303 av += 2;
1304 }
1305 else
1306 {
1307 rv = pr[i];
1308 av = pa[i];
1309 bv = pb[i];
1310 }
1311 if (rv > even_rlimit) rv = even_rlimit;
1312 pa[i] = av;
1313 pb[i] = bv + (aspect2 << 1);
1314 pr[i] = rv + bv;
1315
1316 if (rv_sh + bv_sh + av_sh < pr_sh[i] + pb_sh[i])
1317 {
1318 rv_sh += av_sh;
1319 av_sh += 2;
1320 }
1321 else
1322 {
1323 rv_sh = pr_sh[i];
1324 av_sh = pa_sh[i];
1325 bv_sh = pb_sh[i];
1326 }
1327 if (rv_sh > even_rlimit) rv_sh = even_rlimit;
1328 pa_sh[i] = av_sh;
1329 pb_sh[i] = bv_sh + (aspect2 << 1);
1330 pr_sh[i] = rv_sh + bv_sh;
1331 }
1332 }
1333
1334 ebc->seed1 = seed1;
1335 ebc->seed2 = seed2;
1336 #endif
1337 }
1338
1339 /**
1340 * even_better_line_rll: Screen a line using Even ToneFS screeing.
1341 * @ctx: An #EBPlaneCtx context.
1342 * @dest: Array of destination buffers, 8 bpp pixels each.
1343 * @src: Array of source buffers, runlength encoded.
1344 *
1345 * Screens a single line using Even ToneFS screening.
1346 **/
1347 void
even_better_line_rll(EvenBetterCtx * ebc,uchar ** dest,const ET_Rll * const * src)1348 even_better_line_rll (EvenBetterCtx *ebc, uchar **dest,
1349 const ET_Rll *const *src)
1350 {
1351
1352 if (ebc->dump_file && ebc->dump_level >= EB_DUMP_INPUT)
1353 {
1354 int i;
1355
1356 /* Note: we should calculate the actual number of runlength
1357 codes here. As it is, it will just waste storage a bit. */
1358 for (i = 0; i < ebc->n_planes; i++)
1359 fwrite (src[i], sizeof(ET_Rll), ebc->source_width,
1360 ebc->dump_file);
1361 }
1362 #ifdef USE_VECTOR
1363 if (ebc->using_vectors)
1364 even_better_line_vector(ebc, dest, src);
1365 else
1366 #endif
1367 if (ebc->do_shadows)
1368 even_better_line_both (ebc, dest, src);
1369 else
1370 even_better_line_hi (ebc, dest, src);
1371 if (ebc->dump_file && ebc->dump_level >= EB_DUMP_INPUT)
1372 {
1373 int i;
1374
1375 for (i = 0; i < ebc->n_planes; i++)
1376 fwrite (dest[i], 1, ebc->dest_width,
1377 ebc->dump_file);
1378 }
1379 }
1380
1381 /**
1382 * even_better_compress_rll: Compress a single scan line to RLL format.
1383 * @dst: Destination buffer.
1384 * @src: Source buffer.
1385 * @width: Number of source pixels.
1386 *
1387 * Return value: number of runlength codes.
1388 **/
1389 static int
even_better_compress_rll(ET_Rll * dst,const ET_SrcPixel * src,int src_width,int dst_width)1390 even_better_compress_rll (ET_Rll *dst, const ET_SrcPixel *src,
1391 int src_width, int dst_width)
1392 {
1393 int rll_idx;
1394 int i;
1395 int count;
1396 ET_SrcPixel last_val;
1397 int whole = dst_width / src_width;
1398 int frac = dst_width % src_width;
1399 int rem;
1400
1401 rll_idx = 0;
1402 last_val = src[0];
1403 count = whole;
1404 if (frac == 0)
1405 {
1406 for (i = 1; i < src_width; i++)
1407 {
1408 ET_SrcPixel val = src[i];
1409
1410 if (count > 0xffff - whole || val != last_val)
1411 {
1412 dst[rll_idx].length = count;
1413 dst[rll_idx].value = last_val;
1414 rll_idx++;
1415 last_val = val;
1416 count = 0;
1417 }
1418 count += whole;
1419 }
1420 }
1421 else
1422 {
1423 rem = frac;
1424 for (i = 1; i < src_width; i++)
1425 {
1426 ET_SrcPixel val = src[i];
1427
1428 if (count >= 0xffff - whole || val != last_val)
1429 {
1430 dst[rll_idx].length = count;
1431 dst[rll_idx].value = last_val;
1432 rll_idx++;
1433 last_val = val;
1434 count = 0;
1435 }
1436 count += whole;
1437 rem += frac;
1438 if (rem >= src_width)
1439 {
1440 count++;
1441 rem -= src_width;
1442 }
1443 }
1444 }
1445 dst[rll_idx].length = count;
1446 dst[rll_idx].value = last_val;
1447 rll_idx++;
1448 return rll_idx;
1449 }
1450
1451 /**
1452 * even_better_line: Screen a line using Even TonenFS screeing.
1453 * @ctx: An #EBPlaneCtx context.
1454 * @dest: Array of destination buffers, 8 bpp pixels each.
1455 * @src: Array of source buffer, ET_SrcPixel pixels each.
1456 *
1457 * Screens a single line using Even ToneFS screening.
1458 **/
1459 void
even_better_line(EvenBetterCtx * ebc,uchar ** dest,const ET_SrcPixel * const * src)1460 even_better_line (EvenBetterCtx *ebc, uchar **dest,
1461 const ET_SrcPixel *const *src)
1462 {
1463 ET_Rll *rll_buf[M];
1464 int i;
1465 int source_width = ebc->source_width;
1466 int dest_width = ebc->dest_width;
1467
1468 #ifdef USE_AVEC
1469 if (ebc->using_vectors == 2)
1470 {
1471 even_better_line_fastprep (ebc, dest, src);
1472 }
1473 else
1474 #endif
1475 {
1476 for (i = 0; i < ebc->n_planes; i++)
1477 {
1478 rll_buf[i] = (ET_Rll *)malloc (source_width * sizeof(ET_Rll));
1479 even_better_compress_rll (rll_buf[i], src[i], source_width, dest_width);
1480 }
1481 even_better_line_rll (ebc, dest, (const ET_Rll * const *)rll_buf);
1482 for (i = 0; i < ebc->n_planes; i++)
1483 free (rll_buf[i]);
1484 }
1485 }
1486
1487 /**
1488 * even_better_plane_free: Free an #EBPlaneCtx context.
1489 * @ctx: The #EBPlaneCtx context to free.
1490 *
1491 * Frees @ctx.
1492 **/
1493 static void
even_better_plane_free(EBPlaneCtx * ctx)1494 even_better_plane_free (EBPlaneCtx *ctx)
1495 {
1496 free (ctx->rb_line);
1497 free (ctx->iir_line);
1498 free (ctx->r_line);
1499 free (ctx->a_line);
1500 free (ctx->b_line);
1501 free (ctx->lut);
1502 free (ctx->rb_lut);
1503 free (ctx->rs_lut);
1504 free (ctx->white_count_line);
1505 free (ctx);
1506 }
1507
1508 static int
even_log2(int x)1509 even_log2 (int x)
1510 {
1511 int y = 0;
1512 int z;
1513
1514 for (z = x; z > 1; z = z >> 1)
1515 y++;
1516 return y;
1517 }
1518
1519 /**
1520 * even_better_new: Create new Even ToneFS screening context.
1521 * @source_width: Width of source buffer.
1522 * @dest_width: Width of destination buffer, in pixels.
1523 * @lut: Lookup table for gray values.
1524 *
1525 * Creates a new context for Even ToneFS screening.
1526 *
1527 * If @dest_width is larger than @source_width, then input lines will
1528 * be expanded using nearest-neighbor sampling.
1529 *
1530 * @lut should be an array of 256 values, one for each possible input
1531 * gray value. @lut is a lookup table for gray values. Each value
1532 * ranges from 0 (black) to 2^24 (white).
1533 *
1534 * Return value: The new #EBPlaneCtx context.
1535 **/
1536 static EBPlaneCtx *
even_better_plane_new(const EvenBetterParams * params,EvenBetterCtx * ebc,int plane_idx)1537 even_better_plane_new (const EvenBetterParams *params, EvenBetterCtx *ebc,
1538 int plane_idx)
1539 {
1540 int source_width = params->source_width;
1541 int dest_width = params->dest_width;
1542 int *lut = params->luts[plane_idx];
1543 EBPlaneCtx *result;
1544 int i;
1545 int *new_lut;
1546 int *rb_lut;
1547 char *rs_lut;
1548 double rbscale = eb_compute_rbscale(params);
1549 int even_c1 = ebc->even_c1;
1550 int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1);
1551 int do_shadows = params->do_shadows;
1552 int log2_levels;
1553 int rs_base;
1554
1555 result = (EBPlaneCtx *)malloc (sizeof(EBPlaneCtx));
1556
1557 result->source_width = source_width;
1558 result->dest_width = dest_width;
1559
1560 new_lut = (int *)malloc ((ET_SRC_MAX + 1) * sizeof(int));
1561 for (i = 0; i < ET_SRC_MAX + 1; i++)
1562 {
1563 int nli;
1564
1565 if (lut == NULL)
1566 {
1567 #if ET_SRC_MAX == 255
1568 nli = (i * 65793 + (i >> 7)) >> (24 - EVEN_SHIFT);
1569 #else
1570 nli = (i * ((double) (1 << EVEN_SHIFT)) / ET_SRC_MAX) + 0.5;
1571 #endif
1572 }
1573 else
1574 nli = lut[i] >> (24 - EVEN_SHIFT);
1575 new_lut[i] = (1 << EVEN_SHIFT) - nli;
1576 }
1577
1578 rb_lut = (int *)malloc ((ET_SRC_MAX + 1) * sizeof(int));
1579 rs_lut = (char *)malloc ((ET_SRC_MAX + 1) * sizeof(int));
1580
1581 log2_levels = even_log2 (params->levels);
1582 rs_base = 35 - EVEN_SHIFT + log2_levels - params->rand_scale;
1583
1584 for (i = 0; i <= ET_SRC_MAX; i++)
1585 {
1586 double rb;
1587 int nl = new_lut[i] * (params->levels - 1);
1588 int rs;
1589
1590 if (nl == 0)
1591 rb = 0;
1592 else
1593 {
1594 rb = (rbscale * (1 << (2 * EVEN_SHIFT - even_c1))) / nl;
1595 if (rb > even_rlimit << (EVEN_SHIFT - even_c1))
1596 rb = even_rlimit << (EVEN_SHIFT - even_c1);
1597 }
1598
1599 rs = eb_compute_randshift(nl, rs_base, do_shadows, params->levels);
1600 rs_lut[i] = rs;
1601
1602 if (params->do_shadows)
1603 {
1604 nl = ((1 << EVEN_SHIFT) - new_lut[i]) * (params->levels - 1);
1605
1606 if (nl == 0)
1607 rb = 0;
1608 else
1609 {
1610 int rb_sh;
1611 rb_sh = (rbscale * (1 << (2 * EVEN_SHIFT - even_c1))) / nl;
1612 if (rb_sh > even_rlimit << (EVEN_SHIFT - even_c1))
1613 rb_sh = even_rlimit << (EVEN_SHIFT - even_c1);
1614 rb -= rb_sh;
1615 }
1616 }
1617 rb_lut[i] = rb;
1618
1619 }
1620
1621 result->lut = new_lut;
1622 result->rb_lut = rb_lut;
1623 result->rs_lut = rs_lut;
1624
1625 result->rb_line = (int *)calloc (dest_width, sizeof(int));
1626 result->iir_line = (int *)calloc (dest_width, sizeof(int));
1627 result->r_line = (int *)calloc (dest_width, sizeof(int));
1628 result->a_line = (int *)calloc (dest_width, sizeof(int));
1629 result->b_line = (int *)calloc (dest_width, sizeof(int));
1630 result->white_count_line = (int *)calloc ((dest_width + 15) >> 4, sizeof(int));
1631 if (do_shadows)
1632 {
1633 result->r_line_sh = (int *)calloc (dest_width, sizeof(int));
1634 result->a_line_sh = (int *)calloc (dest_width, sizeof(int));
1635 result->b_line_sh = (int *)calloc (dest_width, sizeof(int));
1636 }
1637 else
1638 {
1639 result->r_line_sh = NULL;
1640 result->a_line_sh = NULL;
1641 result->b_line_sh = NULL;
1642 }
1643 for (i = 0; i < dest_width; i++)
1644 {
1645 result->a_line[i] = 1;
1646 result->b_line[i] = 1;
1647 result->iir_line[i] = -((rand () & 0x7fff) << 6) >> (24 - EVEN_SHIFT);
1648 if (do_shadows)
1649 {
1650 result->a_line_sh[i] = 1;
1651 result->b_line_sh[i] = 1;
1652 }
1653 }
1654
1655 return result;
1656 }
1657
1658 EvenBetterCtx *
even_better_new(const EvenBetterParams * params)1659 even_better_new (const EvenBetterParams *params)
1660 {
1661 EvenBetterCtx *result = (EvenBetterCtx *)malloc (sizeof(EvenBetterCtx));
1662 int n_planes = params->n_planes;
1663 int i;
1664 int log2_levels, log2_aspect;
1665 int using_vectors = 0;
1666
1667 if (params->dump_file)
1668 {
1669 int header[5];
1670
1671 header[0] = 0x70644245;
1672 header[1] = 'M' * 0x1010000 + 'I' * 0x101;
1673 header[2] = EVENBETTER_VERSION;
1674 header[3] = ET_SRC_MAX;
1675 header[4] = sizeof(ET_SrcPixel);
1676 fwrite (header, sizeof(int), sizeof(header) / sizeof(header[0]),
1677 params->dump_file);
1678 if (params->dump_level >= EB_DUMP_PARAMS)
1679 {
1680
1681 fwrite (params, 1, sizeof(EvenBetterParams), params->dump_file);
1682 }
1683 if (params->dump_level >= EB_DUMP_LUTS)
1684 {
1685 int i;
1686 for (i = 0; i < params->n_planes; i++)
1687 fwrite (params->luts[i], sizeof(int), ET_SRC_MAX + 1,
1688 params->dump_file);
1689 }
1690 }
1691
1692 result->source_width = params->source_width;
1693 result->dest_width = params->dest_width;
1694 result->n_planes = n_planes;
1695 result->levels = params->levels;
1696
1697 result->aspect = params->aspect;
1698
1699 result->even_ehi = 0.6 * (1 << EVEN_SHIFT) / (params->levels - 1);
1700 result->even_elo = -result->even_ehi;
1701
1702 result->strengths = (int *)malloc (sizeof(int) * n_planes);
1703 memcpy (result->strengths, params->strengths,
1704 sizeof(int) * n_planes);
1705
1706 log2_levels = even_log2 (params->levels);
1707 log2_aspect = even_log2 (params->aspect);
1708 result->even_c1 = 6 + log2_aspect + log2_levels - params->even_c1_scale;
1709 result->do_shadows = params->do_shadows;
1710
1711 result->c_line = (int *)calloc (params->dest_width, sizeof(int));
1712
1713 result->seed1 = 0x5324879f;
1714 result->seed2 = 0xb78d0945;
1715
1716 result->dump_file = params->dump_file;
1717 result->dump_level = params->dump_level;
1718
1719 #ifdef USE_SSE2
1720 using_vectors = eb_test_sse2();
1721 #endif
1722 #ifdef USE_AVEC
1723 using_vectors = 1; /* todo: Altivec sensing */
1724
1725 /* select fastprep */
1726 if (sizeof(ET_SrcPixel) == 1 && using_vectors && params->gamma != 0)
1727 using_vectors = 2;
1728
1729 #endif
1730
1731 #ifdef USE_VECTOR
1732 result->using_vectors = using_vectors;
1733 #endif
1734 if (using_vectors)
1735 {
1736 #ifdef USE_SSE2
1737 result->sse2_ctx = (eb_ctx_sse2 **)malloc(sizeof(eb_ctx_sse2 *) *
1738 ((n_planes + 3) >> 2));
1739 for (i = 0; i < n_planes; i += 4)
1740 {
1741 int end_plane = i + 4 < n_planes ? i + 4 : n_planes;
1742 result->sse2_ctx[i >> 2] = eb_ctx_sse2_new(params, i, end_plane);
1743 }
1744 #endif
1745 #ifdef USE_AVEC
1746 result->avec_ctx = (eb_ctx_avec **)malloc(sizeof(eb_ctx_avec *) *
1747 ((n_planes + 3) >> 2));
1748 for (i = 0; i < n_planes; i += 4)
1749 {
1750 int end_plane = i + 4 < n_planes ? i + 4 : n_planes;
1751 result->avec_ctx[i >> 2] = eb_ctx_avec_new(params, i, end_plane);
1752 }
1753 #endif
1754 result->plane_ctx = NULL;
1755 }
1756 else
1757 {
1758 result->plane_ctx = (EBPlaneCtx **)malloc(sizeof(EBPlaneCtx *) * n_planes);
1759 for (i = 0; i < n_planes; i++)
1760 result->plane_ctx[i] = even_better_plane_new (params, result, i);
1761 }
1762 return result;
1763 }
1764
1765 /**
1766 * even_better_free: Free an #EvenBetterCtx context.
1767 * @ctx: The #EvenBetterCtx context to free.
1768 *
1769 * Frees @ctx.
1770 **/
1771 void
even_better_free(EvenBetterCtx * ctx)1772 even_better_free (EvenBetterCtx *ctx)
1773 {
1774 int i;
1775 int n_planes = ctx->n_planes;
1776
1777 if (ctx->dump_file)
1778 fclose (ctx->dump_file);
1779
1780 #ifdef USE_VECTOR
1781 if (ctx->using_vectors)
1782 {
1783 #ifdef USE_SSE2
1784 for (i = 0; i < n_planes; i += 4)
1785 eb_ctx_sse2_free(ctx->sse2_ctx[i >> 2]);
1786 free(ctx->sse2_ctx);
1787 #endif
1788 #ifdef USE_AVEC
1789 for (i = 0; i < n_planes; i += 4)
1790 eb_ctx_avec_free(ctx->avec_ctx[i >> 2]);
1791 free(ctx->avec_ctx);
1792 #endif
1793 }
1794 else
1795 #endif
1796 {
1797 for (i = 0; i < n_planes; i++)
1798 even_better_plane_free (ctx->plane_ctx[i]);
1799 free(ctx->plane_ctx);
1800 }
1801 free (ctx->strengths);
1802 free (ctx->c_line);
1803
1804 free (ctx);
1805 }
1806