1 /* pam.h - pam (portable alpha map) utility library
2 **
3 ** Colormap routines.
4 **
5 ** Copyright (C) 1989, 1991 by Jef Poskanzer.
6 ** Copyright (C) 1997 by Greg Roelofs.
7 **
8 ** Permission to use, copy, modify, and distribute this software and its
9 ** documentation for any purpose and without fee is hereby granted, provided
10 ** that the above copyright notice appear in all copies and that both that
11 ** copyright notice and this permission notice appear in supporting
12 ** documentation. This software is provided "as is" without express or
13 ** implied warranty.
14 */
15
16 #ifndef PAM_H
17 #define PAM_H
18
19 // accidental debug assertions make color search much slower,
20 // so force assertions off if there's no explicit setting
21 #if !defined(NDEBUG) && !defined(DEBUG)
22 #define NDEBUG
23 #endif
24
25 #include <math.h>
26 #include <assert.h>
27 #include <stdlib.h>
28 #include <stdbool.h>
29
30 #ifndef MAX
31 # define MAX(a,b) ((a) > (b)? (a) : (b))
32 # define MIN(a,b) ((a) < (b)? (a) : (b))
33 #endif
34
35 #define MAX_DIFF 1e20
36
37 #ifndef USE_SSE
38 # if defined(__SSE__) && (defined(__amd64__) || defined(__X86_64__) || defined(_WIN64) || defined(WIN32) || defined(__WIN32__))
39 # define USE_SSE 1
40 # else
41 # define USE_SSE 0
42 # endif
43 #endif
44
45 #if USE_SSE
46 # include <xmmintrin.h>
47 # ifdef _MSC_VER
48 # include <intrin.h>
49 # define SSE_ALIGN
50 # else
51 # define SSE_ALIGN __attribute__ ((aligned (16)))
52 # if defined(__i386__) && defined(__PIC__)
53 # define cpuid(func,ax,bx,cx,dx)\
54 __asm__ __volatile__ ( \
55 "push %%ebx\n" \
56 "cpuid\n" \
57 "mov %%ebx, %1\n" \
58 "pop %%ebx\n" \
59 : "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) \
60 : "a" (func));
61 # else
62 # define cpuid(func,ax,bx,cx,dx)\
63 __asm__ __volatile__ ("cpuid":\
64 "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
65 # endif
66 #endif
67 #else
68 # define SSE_ALIGN
69 #endif
70
71 #ifndef _MSC_VER
72 #define LIQ_ARRAY(type, var, count) type var[count]
73 #else
74 #define LIQ_ARRAY(type, var, count) type* var = (type*)_alloca(sizeof(type)*(count))
75 #endif
76
77 #if defined(__GNUC__) || defined (__llvm__)
78 #define ALWAYS_INLINE __attribute__((always_inline)) inline
79 #define NEVER_INLINE __attribute__ ((noinline))
80 #elif defined(_MSC_VER)
81 #define inline __inline
82 #define restrict __restrict
83 #define ALWAYS_INLINE __forceinline
84 #define NEVER_INLINE __declspec(noinline)
85 #else
86 #define ALWAYS_INLINE inline
87 #define NEVER_INLINE
88 #endif
89
90 /* from pam.h */
91
92 typedef struct {
93 unsigned char r, g, b, a;
94 } rgba_pixel;
95
96 typedef struct {
97 float a, r, g, b;
98 } SSE_ALIGN f_pixel;
99
100 static const float internal_gamma = 0.5499f;
101
102 LIQ_PRIVATE void to_f_set_gamma(float gamma_lut[], const double gamma);
103
104 /**
105 Converts 8-bit color to internal gamma and premultiplied alpha.
106 (premultiplied color space is much better for blending of semitransparent colors)
107 */
108 ALWAYS_INLINE static f_pixel rgba_to_f(const float gamma_lut[], const rgba_pixel px);
rgba_to_f(const float gamma_lut[],const rgba_pixel px)109 inline static f_pixel rgba_to_f(const float gamma_lut[], const rgba_pixel px)
110 {
111 float a = px.a/255.f;
112
113 return (f_pixel) {
114 .a = a,
115 .r = gamma_lut[px.r]*a,
116 .g = gamma_lut[px.g]*a,
117 .b = gamma_lut[px.b]*a,
118 };
119 }
120
f_to_rgb(const float gamma,const f_pixel px)121 inline static rgba_pixel f_to_rgb(const float gamma, const f_pixel px)
122 {
123 if (px.a < 1.f/256.f) {
124 return (rgba_pixel){0,0,0,0};
125 }
126
127 float r = px.r / px.a,
128 g = px.g / px.a,
129 b = px.b / px.a,
130 a = px.a;
131
132 r = powf(r, gamma/internal_gamma);
133 g = powf(g, gamma/internal_gamma);
134 b = powf(b, gamma/internal_gamma);
135
136 // 256, because numbers are in range 1..255.9999… rounded down
137 r *= 256.f;
138 g *= 256.f;
139 b *= 256.f;
140 a *= 256.f;
141
142 return (rgba_pixel){
143 .r = r>=255.f ? 255 : r,
144 .g = g>=255.f ? 255 : g,
145 .b = b>=255.f ? 255 : b,
146 .a = a>=255.f ? 255 : a,
147 };
148 }
149
150 ALWAYS_INLINE static double colordifference_ch(const double x, const double y, const double alphas);
colordifference_ch(const double x,const double y,const double alphas)151 inline static double colordifference_ch(const double x, const double y, const double alphas)
152 {
153 // maximum of channel blended on white, and blended on black
154 // premultiplied alpha and backgrounds 0/1 shorten the formula
155 const double black = x-y, white = black+alphas;
156 return MAX(black*black, white*white);
157 }
158
159 ALWAYS_INLINE static float colordifference_stdc(const f_pixel px, const f_pixel py);
colordifference_stdc(const f_pixel px,const f_pixel py)160 inline static float colordifference_stdc(const f_pixel px, const f_pixel py)
161 {
162 // px_b.rgb = px.rgb + 0*(1-px.a) // blend px on black
163 // px_b.a = px.a + 1*(1-px.a)
164 // px_w.rgb = px.rgb + 1*(1-px.a) // blend px on white
165 // px_w.a = px.a + 1*(1-px.a)
166
167 // px_b.rgb = px.rgb // difference same as in opaque RGB
168 // px_b.a = 1
169 // px_w.rgb = px.rgb - px.a // difference simplifies to formula below
170 // px_w.a = 1
171
172 // (px.rgb - px.a) - (py.rgb - py.a)
173 // (px.rgb - py.rgb) + (py.a - px.a)
174
175 const double alphas = py.a-px.a;
176 return colordifference_ch(px.r, py.r, alphas) +
177 colordifference_ch(px.g, py.g, alphas) +
178 colordifference_ch(px.b, py.b, alphas);
179 }
180
181 ALWAYS_INLINE static float colordifference(f_pixel px, f_pixel py);
colordifference(f_pixel px,f_pixel py)182 inline static float colordifference(f_pixel px, f_pixel py)
183 {
184 #if USE_SSE
185 #ifdef _MSC_VER
186 /* In MSVC we cannot use the align attribute in parameters.
187 * This is used a lot, so we just use an unaligned load.
188 * Also the compiler incorrectly inlines vpx and vpy without
189 * the volatile when optimization is applied for x86_64. */
190 const volatile __m128 vpx = _mm_loadu_ps((const float*)&px);
191 const volatile __m128 vpy = _mm_loadu_ps((const float*)&py);
192 #else
193 const __m128 vpx = _mm_load_ps((const float*)&px);
194 const __m128 vpy = _mm_load_ps((const float*)&py);
195 #endif
196
197 // y.a - x.a
198 __m128 alphas = _mm_sub_ss(vpy, vpx);
199 alphas = _mm_shuffle_ps(alphas,alphas,0); // copy first to all four
200
201 __m128 onblack = _mm_sub_ps(vpx, vpy); // x - y
202 __m128 onwhite = _mm_add_ps(onblack, alphas); // x - y + (y.a - x.a)
203
204 onblack = _mm_mul_ps(onblack, onblack);
205 onwhite = _mm_mul_ps(onwhite, onwhite);
206 const __m128 max = _mm_max_ps(onwhite, onblack);
207
208 // add rgb, not a
209 const __m128 maxhl = _mm_movehl_ps(max, max);
210 const __m128 tmp = _mm_add_ps(max, maxhl);
211 const __m128 sum = _mm_add_ss(maxhl, _mm_shuffle_ps(tmp, tmp, 1));
212
213 const float res = _mm_cvtss_f32(sum);
214 assert(fabs(res - colordifference_stdc(px,py)) < 0.001);
215 return res;
216 #else
217 return colordifference_stdc(px,py);
218 #endif
219 }
220
221 /* from pamcmap.h */
222 union rgba_as_int {
223 rgba_pixel rgba;
224 unsigned int l;
225 };
226
227 typedef struct {
228 f_pixel acolor;
229 float adjusted_weight, // perceptual weight changed to tweak how mediancut selects colors
230 perceptual_weight; // number of pixels weighted by importance of different areas of the picture
231
232 float color_weight; // these two change every time histogram subset is sorted
233 union {
234 unsigned int sort_value;
235 unsigned char likely_colormap_index;
236 } tmp;
237 } hist_item;
238
239 typedef struct {
240 hist_item *achv;
241 void (*free)(void*);
242 double total_perceptual_weight;
243 unsigned int size;
244 unsigned int ignorebits;
245 } histogram;
246
247 typedef struct {
248 f_pixel acolor;
249 float popularity;
250 bool fixed; // if true it's user-supplied and must not be changed (e.g in K-Means iteration)
251 } colormap_item;
252
253 typedef struct colormap {
254 unsigned int colors;
255 void* (*malloc)(size_t);
256 void (*free)(void*);
257 colormap_item palette[];
258 } colormap;
259
260 struct acolorhist_arr_item {
261 union rgba_as_int color;
262 unsigned int perceptual_weight;
263 };
264
265 struct acolorhist_arr_head {
266 struct acolorhist_arr_item inline1, inline2;
267 unsigned int used, capacity;
268 struct acolorhist_arr_item *other_items;
269 };
270
271 struct acolorhash_table {
272 struct mempool *mempool;
273 unsigned int ignorebits, maxcolors, colors, cols, rows;
274 unsigned int hash_size;
275 unsigned int freestackp;
276 struct acolorhist_arr_item *freestack[512];
277 struct acolorhist_arr_head buckets[];
278 };
279
280 LIQ_PRIVATE void pam_freeacolorhash(struct acolorhash_table *acht);
281 LIQ_PRIVATE struct acolorhash_table *pam_allocacolorhash(unsigned int maxcolors, unsigned int surface, unsigned int ignorebits, void* (*malloc)(size_t), void (*free)(void*));
282 LIQ_PRIVATE histogram *pam_acolorhashtoacolorhist(const struct acolorhash_table *acht, const double gamma, void* (*malloc)(size_t), void (*free)(void*));
283 LIQ_PRIVATE bool pam_computeacolorhash(struct acolorhash_table *acht, const rgba_pixel *const pixels[], unsigned int cols, unsigned int rows, const unsigned char *importance_map);
284 LIQ_PRIVATE bool pam_add_to_hash(struct acolorhash_table *acht, unsigned int hash, unsigned int boost, union rgba_as_int px, unsigned int row, unsigned int rows);
285
286 LIQ_PRIVATE void pam_freeacolorhist(histogram *h);
287
288 LIQ_PRIVATE colormap *pam_colormap(unsigned int colors, void* (*malloc)(size_t), void (*free)(void*));
289 LIQ_PRIVATE colormap *pam_duplicate_colormap(colormap *map);
290 LIQ_PRIVATE void pam_freecolormap(colormap *c);
291
292 #endif
293