1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #if defined(__ARM_NEON)
17 #include <arm_neon.h>
18 #elif defined(__SSE__)
19 #include <immintrin.h>
20
21 #if defined(__clang__)
22 // That #include <immintrin.h> is usually enough, but Clang's headers
23 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24 // defined, because lots of programs on Windows would include that and
25 // it'd be a lot slower. But we want all those headers included so we
26 // can use their features after runtime checks later.
27 #include <smmintrin.h>
28 #include <avxintrin.h>
29 #include <avx2intrin.h>
30 #include <avx512fintrin.h>
31 #include <avx512dqintrin.h>
32 #endif
33 #endif
34
35 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
36 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
37 //
38 // Please do not use sizeof() directly, and size_t only when required.
39 // (We have no way of enforcing these requests...)
40 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
41
42 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
43 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
44
45 static const union {
46 uint32_t bits;
47 float f;
48 } inf_ = { 0x7f800000 };
49 #define INFINITY_ inf_.f
50
51 #if defined(__clang__) || defined(__GNUC__)
52 #define small_memcpy __builtin_memcpy
53 #else
54 #define small_memcpy memcpy
55 #endif
56
log2f_(float x)57 static float log2f_(float x) {
58 // The first approximation of log2(x) is its exponent 'e', minus 127.
59 int32_t bits;
60 small_memcpy(&bits, &x, sizeof(bits));
61
62 float e = (float)bits * (1.0f / (1<<23));
63
64 // If we use the mantissa too we can refine the error signficantly.
65 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
66 float m;
67 small_memcpy(&m, &m_bits, sizeof(m));
68
69 return (e - 124.225514990f
70 - 1.498030302f*m
71 - 1.725879990f/(0.3520887068f + m));
72 }
logf_(float x)73 static float logf_(float x) {
74 const float ln2 = 0.69314718f;
75 return ln2*log2f_(x);
76 }
77
exp2f_(float x)78 static float exp2f_(float x) {
79 float fract = x - floorf_(x);
80
81 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
82 - 1.490129070f*fract
83 + 27.728023300f/(4.84252568f - fract));
84
85 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
86 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
87 // INT_MIN is a power of 2 and exactly representable as a float, so it's fine.
88 if (fbits >= (float)INT_MAX) {
89 return INFINITY_;
90 } else if (fbits < (float)INT_MIN) {
91 return -INFINITY_;
92 }
93
94 int32_t bits = (int32_t)fbits;
95 small_memcpy(&x, &bits, sizeof(x));
96 return x;
97 }
98
99 // Not static, as it's used by some test tools.
powf_(float x,float y)100 float powf_(float x, float y) {
101 assert (x >= 0);
102 return (x == 0) || (x == 1) ? x
103 : exp2f_(log2f_(x) * y);
104 }
105
expf_(float x)106 static float expf_(float x) {
107 const float log2_e = 1.4426950408889634074f;
108 return exp2f_(log2_e * x);
109 }
110
fmaxf_(float x,float y)111 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)112 static float fminf_(float x, float y) { return x < y ? x : y; }
113
isfinitef_(float x)114 static bool isfinitef_(float x) { return 0 == x*0; }
115
minus_1_ulp(float x)116 static float minus_1_ulp(float x) {
117 int32_t bits;
118 memcpy(&bits, &x, sizeof(bits));
119 bits = bits - 1;
120 memcpy(&x, &bits, sizeof(bits));
121 return x;
122 }
123
124 // Most transfer functions we work with are sRGBish.
125 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
126 // and repurpose the other fields to hold the parameters of the HDR functions.
127 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
128 struct TF_PQish { float A,B,C,D,E,F; };
129 struct TF_HLGish { float R,G,a,b,c; };
130
TFKind_marker(TFKind kind)131 static float TFKind_marker(TFKind kind) {
132 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
133 return -(float)kind;
134 }
135
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)136 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
137 , TF_HLGish* hlg = nullptr) {
138 if (tf.g < 0 && (int)tf.g == tf.g) {
139 // TODO: sanity checks for PQ/HLG like we do for sRGBish.
140 switch ((int)tf.g) {
141 case -PQish: if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
142 case -HLGish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
143 case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
144 }
145 return Bad;
146 }
147
148 // Basic sanity checks for sRGBish transfer functions.
149 if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
150 // a,c,d,g should be non-negative to make any sense.
151 && tf.a >= 0
152 && tf.c >= 0
153 && tf.d >= 0
154 && tf.g >= 0
155 // Raising a negative value to a fractional tf->g produces complex numbers.
156 && tf.a * tf.d + tf.b >= 0) {
157 return sRGBish;
158 }
159
160 return Bad;
161 }
162
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)163 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
164 float A, float B, float C,
165 float D, float E, float F) {
166 *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
167 assert(classify(*tf) == PQish);
168 return true;
169 }
170
skcms_TransferFunction_makeHLGish(skcms_TransferFunction * tf,float R,float G,float a,float b,float c)171 bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf,
172 float R, float G,
173 float a, float b, float c) {
174 *tf = { TFKind_marker(HLGish), R,G, a,b,c, 0 };
175 assert(classify(*tf) == HLGish);
176 return true;
177 }
178
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)179 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
180 float sign = x < 0 ? -1.0f : 1.0f;
181 x *= sign;
182
183 TF_PQish pq;
184 TF_HLGish hlg;
185 switch (classify(*tf, &pq, &hlg)) {
186 case Bad: break;
187
188 case HLGish: return sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
189 : expf_((x-hlg.c)*hlg.a) + hlg.b);
190
191 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
192 case HLGinvish: return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
193 : hlg.a * logf_(x - hlg.b) + hlg.c);
194
195
196 case sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f
197 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
198
199 case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
200 / (pq.D + pq.E * powf_(x, pq.C)),
201 pq.F);
202 }
203 return 0;
204 }
205
206
eval_curve(const skcms_Curve * curve,float x)207 static float eval_curve(const skcms_Curve* curve, float x) {
208 if (curve->table_entries == 0) {
209 return skcms_TransferFunction_eval(&curve->parametric, x);
210 }
211
212 float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
213 int lo = (int) ix ,
214 hi = (int)(float)minus_1_ulp(ix + 1.0f);
215 float t = ix - (float)lo;
216
217 float l, h;
218 if (curve->table_8) {
219 l = curve->table_8[lo] * (1/255.0f);
220 h = curve->table_8[hi] * (1/255.0f);
221 } else {
222 uint16_t be_l, be_h;
223 memcpy(&be_l, curve->table_16 + 2*lo, 2);
224 memcpy(&be_h, curve->table_16 + 2*hi, 2);
225 #if __BIG_ENDIAN__
226 l = be_l * (1/65535.0f);
227 h = be_h * (1/65535.0f);
228 #else
229 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
230 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
231 l = le_l * (1/65535.0f);
232 h = le_h * (1/65535.0f);
233 #endif
234 }
235 return l + (h-l)*t;
236 }
237
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)238 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
239 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
240 const float dx = 1.0f / (N - 1);
241 float err = 0;
242 for (uint32_t i = 0; i < N; i++) {
243 float x = i * dx,
244 y = eval_curve(curve, x);
245 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
246 }
247 return err;
248 }
249
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)250 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
251 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
252 }
253
254 // Additional ICC signature values that are only used internally
255 enum {
256 // File signature
257 skcms_Signature_acsp = 0x61637370,
258
259 // Tag signatures
260 skcms_Signature_rTRC = 0x72545243,
261 skcms_Signature_gTRC = 0x67545243,
262 skcms_Signature_bTRC = 0x62545243,
263 skcms_Signature_kTRC = 0x6B545243,
264
265 skcms_Signature_rXYZ = 0x7258595A,
266 skcms_Signature_gXYZ = 0x6758595A,
267 skcms_Signature_bXYZ = 0x6258595A,
268
269 skcms_Signature_A2B0 = 0x41324230,
270 skcms_Signature_A2B1 = 0x41324231,
271 skcms_Signature_mAB = 0x6D414220,
272
273 skcms_Signature_CHAD = 0x63686164,
274 skcms_Signature_WTPT = 0x77747074,
275
276 // Type signatures
277 skcms_Signature_curv = 0x63757276,
278 skcms_Signature_mft1 = 0x6D667431,
279 skcms_Signature_mft2 = 0x6D667432,
280 skcms_Signature_para = 0x70617261,
281 skcms_Signature_sf32 = 0x73663332,
282 // XYZ is also a PCS signature, so it's defined in skcms.h
283 // skcms_Signature_XYZ = 0x58595A20,
284 };
285
read_big_u16(const uint8_t * ptr)286 static uint16_t read_big_u16(const uint8_t* ptr) {
287 uint16_t be;
288 memcpy(&be, ptr, sizeof(be));
289 #if __BIG_ENDIAN__
290 return be;
291 #else
292 #if defined(_MSC_VER)
293 return _byteswap_ushort(be);
294 #else
295 return __builtin_bswap16(be);
296 #endif
297 #endif
298 }
299
read_big_u32(const uint8_t * ptr)300 static uint32_t read_big_u32(const uint8_t* ptr) {
301 uint32_t be;
302 memcpy(&be, ptr, sizeof(be));
303 #if __BIG_ENDIAN__
304 return be;
305 #else
306 #if defined(_MSC_VER)
307 return _byteswap_ulong(be);
308 #else
309 return __builtin_bswap32(be);
310 #endif
311 #endif
312 }
313
read_big_i32(const uint8_t * ptr)314 static int32_t read_big_i32(const uint8_t* ptr) {
315 return (int32_t)read_big_u32(ptr);
316 }
317
read_big_fixed(const uint8_t * ptr)318 static float read_big_fixed(const uint8_t* ptr) {
319 return read_big_i32(ptr) * (1.0f / 65536.0f);
320 }
321
322 // Maps to an in-memory profile so that fields line up to the locations specified
323 // in ICC.1:2010, section 7.2
324 typedef struct {
325 uint8_t size [ 4];
326 uint8_t cmm_type [ 4];
327 uint8_t version [ 4];
328 uint8_t profile_class [ 4];
329 uint8_t data_color_space [ 4];
330 uint8_t pcs [ 4];
331 uint8_t creation_date_time [12];
332 uint8_t signature [ 4];
333 uint8_t platform [ 4];
334 uint8_t flags [ 4];
335 uint8_t device_manufacturer [ 4];
336 uint8_t device_model [ 4];
337 uint8_t device_attributes [ 8];
338 uint8_t rendering_intent [ 4];
339 uint8_t illuminant_X [ 4];
340 uint8_t illuminant_Y [ 4];
341 uint8_t illuminant_Z [ 4];
342 uint8_t creator [ 4];
343 uint8_t profile_id [16];
344 uint8_t reserved [28];
345 uint8_t tag_count [ 4]; // Technically not part of header, but required
346 } header_Layout;
347
348 typedef struct {
349 uint8_t signature [4];
350 uint8_t offset [4];
351 uint8_t size [4];
352 } tag_Layout;
353
get_tag_table(const skcms_ICCProfile * profile)354 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
355 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
356 }
357
358 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
359 // use of the type is for the CHAD tag that stores exactly nine values.
360 typedef struct {
361 uint8_t type [ 4];
362 uint8_t reserved [ 4];
363 uint8_t values [36];
364 } sf32_Layout;
365
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)366 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
367 skcms_ICCTag tag;
368 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
369 return false;
370 }
371
372 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
373 return false;
374 }
375
376 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
377 const uint8_t* values = sf32Tag->values;
378 for (int r = 0; r < 3; ++r)
379 for (int c = 0; c < 3; ++c, values += 4) {
380 m->vals[r][c] = read_big_fixed(values);
381 }
382 return true;
383 }
384
385 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
386 // the type are for tags/data that store exactly one triple.
387 typedef struct {
388 uint8_t type [4];
389 uint8_t reserved [4];
390 uint8_t X [4];
391 uint8_t Y [4];
392 uint8_t Z [4];
393 } XYZ_Layout;
394
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)395 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
396 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
397 return false;
398 }
399
400 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
401
402 *x = read_big_fixed(xyzTag->X);
403 *y = read_big_fixed(xyzTag->Y);
404 *z = read_big_fixed(xyzTag->Z);
405 return true;
406 }
407
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])408 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
409 skcms_ICCTag tag;
410 return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
411 read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
412 }
413
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)414 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
415 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
416 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
417 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
418 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
419 }
420
421 typedef struct {
422 uint8_t type [4];
423 uint8_t reserved_a [4];
424 uint8_t function_type [2];
425 uint8_t reserved_b [2];
426 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
427 } para_Layout;
428
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)429 static bool read_curve_para(const uint8_t* buf, uint32_t size,
430 skcms_Curve* curve, uint32_t* curve_size) {
431 if (size < SAFE_FIXED_SIZE(para_Layout)) {
432 return false;
433 }
434
435 const para_Layout* paraTag = (const para_Layout*)buf;
436
437 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
438 uint16_t function_type = read_big_u16(paraTag->function_type);
439 if (function_type > kGABCDEF) {
440 return false;
441 }
442
443 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
444 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
445 return false;
446 }
447
448 if (curve_size) {
449 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
450 }
451
452 curve->table_entries = 0;
453 curve->parametric.a = 1.0f;
454 curve->parametric.b = 0.0f;
455 curve->parametric.c = 0.0f;
456 curve->parametric.d = 0.0f;
457 curve->parametric.e = 0.0f;
458 curve->parametric.f = 0.0f;
459 curve->parametric.g = read_big_fixed(paraTag->variable);
460
461 switch (function_type) {
462 case kGAB:
463 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
464 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
465 if (curve->parametric.a == 0) {
466 return false;
467 }
468 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
469 break;
470 case kGABC:
471 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
472 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
473 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
474 if (curve->parametric.a == 0) {
475 return false;
476 }
477 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
478 curve->parametric.f = curve->parametric.e;
479 break;
480 case kGABCD:
481 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
482 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
483 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
484 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
485 break;
486 case kGABCDEF:
487 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
488 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
489 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
490 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
491 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
492 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
493 break;
494 }
495 return classify(curve->parametric) == sRGBish;
496 }
497
498 typedef struct {
499 uint8_t type [4];
500 uint8_t reserved [4];
501 uint8_t value_count [4];
502 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
503 } curv_Layout;
504
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)505 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
506 skcms_Curve* curve, uint32_t* curve_size) {
507 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
508 return false;
509 }
510
511 const curv_Layout* curvTag = (const curv_Layout*)buf;
512
513 uint32_t value_count = read_big_u32(curvTag->value_count);
514 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
515 return false;
516 }
517
518 if (curve_size) {
519 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
520 }
521
522 if (value_count < 2) {
523 curve->table_entries = 0;
524 curve->parametric.a = 1.0f;
525 curve->parametric.b = 0.0f;
526 curve->parametric.c = 0.0f;
527 curve->parametric.d = 0.0f;
528 curve->parametric.e = 0.0f;
529 curve->parametric.f = 0.0f;
530 if (value_count == 0) {
531 // Empty tables are a shorthand for an identity curve
532 curve->parametric.g = 1.0f;
533 } else {
534 // Single entry tables are a shorthand for simple gamma
535 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
536 }
537 } else {
538 curve->table_8 = nullptr;
539 curve->table_16 = curvTag->variable;
540 curve->table_entries = value_count;
541 }
542
543 return true;
544 }
545
546 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
547 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)548 static bool read_curve(const uint8_t* buf, uint32_t size,
549 skcms_Curve* curve, uint32_t* curve_size) {
550 if (!buf || size < 4 || !curve) {
551 return false;
552 }
553
554 uint32_t type = read_big_u32(buf);
555 if (type == skcms_Signature_para) {
556 return read_curve_para(buf, size, curve, curve_size);
557 } else if (type == skcms_Signature_curv) {
558 return read_curve_curv(buf, size, curve, curve_size);
559 }
560
561 return false;
562 }
563
564 // mft1 and mft2 share a large chunk of data
565 typedef struct {
566 uint8_t type [ 4];
567 uint8_t reserved_a [ 4];
568 uint8_t input_channels [ 1];
569 uint8_t output_channels [ 1];
570 uint8_t grid_points [ 1];
571 uint8_t reserved_b [ 1];
572 uint8_t matrix [36];
573 } mft_CommonLayout;
574
575 typedef struct {
576 mft_CommonLayout common [1];
577
578 uint8_t variable [1/*variable*/];
579 } mft1_Layout;
580
581 typedef struct {
582 mft_CommonLayout common [1];
583
584 uint8_t input_table_entries [2];
585 uint8_t output_table_entries [2];
586 uint8_t variable [1/*variable*/];
587 } mft2_Layout;
588
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)589 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
590 // MFT matrices are applied before the first set of curves, but must be identity unless the
591 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
592 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
593 // field/flag.
594 a2b->matrix_channels = 0;
595
596 a2b->input_channels = mftTag->input_channels[0];
597 a2b->output_channels = mftTag->output_channels[0];
598
599 // We require exactly three (ie XYZ/Lab/RGB) output channels
600 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
601 return false;
602 }
603 // We require at least one, and no more than four (ie CMYK) input channels
604 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
605 return false;
606 }
607
608 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
609 a2b->grid_points[i] = mftTag->grid_points[0];
610 }
611 // The grid only makes sense with at least two points along each axis
612 if (a2b->grid_points[0] < 2) {
613 return false;
614 }
615
616 return true;
617 }
618
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)619 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
620 uint32_t input_table_entries, uint32_t output_table_entries,
621 skcms_A2B* a2b) {
622 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
623 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
624 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
625
626 // [input|output]_channels are <= 4, so still no overflow
627 uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table;
628 uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
629
630 uint64_t grid_size = a2b->output_channels * byte_width;
631 for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
632 grid_size *= a2b->grid_points[axis];
633 }
634
635 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
636 return false;
637 }
638
639 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
640 a2b->input_curves[i].table_entries = input_table_entries;
641 if (byte_width == 1) {
642 a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
643 a2b->input_curves[i].table_16 = nullptr;
644 } else {
645 a2b->input_curves[i].table_8 = nullptr;
646 a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
647 }
648 }
649
650 if (byte_width == 1) {
651 a2b->grid_8 = table_base + byte_len_all_input_tables;
652 a2b->grid_16 = nullptr;
653 } else {
654 a2b->grid_8 = nullptr;
655 a2b->grid_16 = table_base + byte_len_all_input_tables;
656 }
657
658 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
659 for (uint32_t i = 0; i < a2b->output_channels; ++i) {
660 a2b->output_curves[i].table_entries = output_table_entries;
661 if (byte_width == 1) {
662 a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
663 a2b->output_curves[i].table_16 = nullptr;
664 } else {
665 a2b->output_curves[i].table_8 = nullptr;
666 a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
667 }
668 }
669
670 return true;
671 }
672
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)673 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
674 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
675 return false;
676 }
677
678 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
679 if (!read_mft_common(mftTag->common, a2b)) {
680 return false;
681 }
682
683 uint32_t input_table_entries = 256;
684 uint32_t output_table_entries = 256;
685
686 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
687 input_table_entries, output_table_entries, a2b);
688 }
689
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)690 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
691 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
692 return false;
693 }
694
695 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
696 if (!read_mft_common(mftTag->common, a2b)) {
697 return false;
698 }
699
700 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
701 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
702
703 // ICC spec mandates that 2 <= table_entries <= 4096
704 if (input_table_entries < 2 || input_table_entries > 4096 ||
705 output_table_entries < 2 || output_table_entries > 4096) {
706 return false;
707 }
708
709 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
710 input_table_entries, output_table_entries, a2b);
711 }
712
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)713 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
714 uint32_t num_curves, skcms_Curve* curves) {
715 for (uint32_t i = 0; i < num_curves; ++i) {
716 if (curve_offset > size) {
717 return false;
718 }
719
720 uint32_t curve_bytes;
721 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
722 return false;
723 }
724
725 if (curve_bytes > UINT32_MAX - 3) {
726 return false;
727 }
728 curve_bytes = (curve_bytes + 3) & ~3U;
729
730 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
731 curve_offset = (uint32_t)new_offset_64;
732 if (new_offset_64 != curve_offset) {
733 return false;
734 }
735 }
736
737 return true;
738 }
739
740 typedef struct {
741 uint8_t type [ 4];
742 uint8_t reserved_a [ 4];
743 uint8_t input_channels [ 1];
744 uint8_t output_channels [ 1];
745 uint8_t reserved_b [ 2];
746 uint8_t b_curve_offset [ 4];
747 uint8_t matrix_offset [ 4];
748 uint8_t m_curve_offset [ 4];
749 uint8_t clut_offset [ 4];
750 uint8_t a_curve_offset [ 4];
751 } mAB_Layout;
752
753 typedef struct {
754 uint8_t grid_points [16];
755 uint8_t grid_byte_width [ 1];
756 uint8_t reserved [ 3];
757 uint8_t variable [1/*variable*/];
758 } mABCLUT_Layout;
759
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)760 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
761 if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
762 return false;
763 }
764
765 const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
766
767 a2b->input_channels = mABTag->input_channels[0];
768 a2b->output_channels = mABTag->output_channels[0];
769
770 // We require exactly three (ie XYZ/Lab/RGB) output channels
771 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
772 return false;
773 }
774 // We require no more than four (ie CMYK) input channels
775 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
776 return false;
777 }
778
779 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
780 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
781 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
782 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
783 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
784
785 // "B" curves must be present
786 if (0 == b_curve_offset) {
787 return false;
788 }
789
790 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
791 a2b->output_curves)) {
792 return false;
793 }
794
795 // "M" curves and Matrix must be used together
796 if (0 != m_curve_offset) {
797 if (0 == matrix_offset) {
798 return false;
799 }
800 a2b->matrix_channels = a2b->output_channels;
801 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
802 a2b->matrix_curves)) {
803 return false;
804 }
805
806 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
807 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
808 return false;
809 }
810 float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
811 const uint8_t* mtx_buf = tag->buf + matrix_offset;
812 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
813 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
814 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
815 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
816 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
817 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
818 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
819 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
820 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
821 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
822 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
823 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
824 } else {
825 if (0 != matrix_offset) {
826 return false;
827 }
828 a2b->matrix_channels = 0;
829 }
830
831 // "A" curves and CLUT must be used together
832 if (0 != a_curve_offset) {
833 if (0 == clut_offset) {
834 return false;
835 }
836 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
837 a2b->input_curves)) {
838 return false;
839 }
840
841 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
842 return false;
843 }
844 const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
845
846 if (clut->grid_byte_width[0] == 1) {
847 a2b->grid_8 = clut->variable;
848 a2b->grid_16 = nullptr;
849 } else if (clut->grid_byte_width[0] == 2) {
850 a2b->grid_8 = nullptr;
851 a2b->grid_16 = clut->variable;
852 } else {
853 return false;
854 }
855
856 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
857 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
858 a2b->grid_points[i] = clut->grid_points[i];
859 // The grid only makes sense with at least two points along each axis
860 if (a2b->grid_points[i] < 2) {
861 return false;
862 }
863 grid_size *= a2b->grid_points[i];
864 }
865 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
866 return false;
867 }
868 } else {
869 if (0 != clut_offset) {
870 return false;
871 }
872
873 // If there is no CLUT, the number of input and output channels must match
874 if (a2b->input_channels != a2b->output_channels) {
875 return false;
876 }
877
878 // Zero out the number of input channels to signal that we're skipping this stage
879 a2b->input_channels = 0;
880 }
881
882 return true;
883 }
884
885 // If you pass f, we'll fit a possibly-non-zero value for *f.
886 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)887 static int fit_linear(const skcms_Curve* curve, int N, float tol,
888 float* c, float* d, float* f = nullptr) {
889 assert(N > 1);
890 // We iteratively fit the first points to the TF's linear piece.
891 // We want the cx + f line to pass through the first and last points we fit exactly.
892 //
893 // As we walk along the points we find the minimum and maximum slope of the line before the
894 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
895 // emtpy, when we definitely can't add any more points.
896 //
897 // Some points' error intervals may intersect the running interval but not lie fully
898 // within it. So we keep track of the last point we saw that is a valid end point candidate,
899 // and once the search is done, back up to build the line through *that* point.
900 const float dx = 1.0f / (N - 1);
901
902 int lin_points = 1;
903
904 float f_zero = 0.0f;
905 if (f) {
906 *f = eval_curve(curve, 0);
907 } else {
908 f = &f_zero;
909 }
910
911
912 float slope_min = -INFINITY_;
913 float slope_max = +INFINITY_;
914 for (int i = 1; i < N; ++i) {
915 float x = i * dx;
916 float y = eval_curve(curve, x);
917
918 float slope_max_i = (y + tol - *f) / x,
919 slope_min_i = (y - tol - *f) / x;
920 if (slope_max_i < slope_min || slope_max < slope_min_i) {
921 // Slope intervals would no longer overlap.
922 break;
923 }
924 slope_max = fminf_(slope_max, slope_max_i);
925 slope_min = fmaxf_(slope_min, slope_min_i);
926
927 float cur_slope = (y - *f) / x;
928 if (slope_min <= cur_slope && cur_slope <= slope_max) {
929 lin_points = i + 1;
930 *c = cur_slope;
931 }
932 }
933
934 // Set D to the last point that met our tolerance.
935 *d = (lin_points - 1) * dx;
936 return lin_points;
937 }
938
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)939 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
940 bool ok = false;
941 if (tag->type == skcms_Signature_mft1) {
942 ok = read_tag_mft1(tag, a2b);
943 } else if (tag->type == skcms_Signature_mft2) {
944 ok = read_tag_mft2(tag, a2b);
945 } else if (tag->type == skcms_Signature_mAB) {
946 ok = read_tag_mab(tag, a2b, pcs_is_xyz);
947 }
948 if (!ok) {
949 return false;
950 }
951
952 // Detect and canonicalize identity tables.
953 skcms_Curve* curves[] = {
954 a2b->input_channels > 0 ? a2b->input_curves + 0 : nullptr,
955 a2b->input_channels > 1 ? a2b->input_curves + 1 : nullptr,
956 a2b->input_channels > 2 ? a2b->input_curves + 2 : nullptr,
957 a2b->input_channels > 3 ? a2b->input_curves + 3 : nullptr,
958 a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
959 a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
960 a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
961 a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
962 a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
963 a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
964 };
965
966 for (int i = 0; i < ARRAY_COUNT(curves); i++) {
967 skcms_Curve* curve = curves[i];
968
969 if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
970 int N = (int)curve->table_entries;
971
972 float c = 0.0f, d = 0.0f, f = 0.0f;
973 if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
974 && c == 1.0f
975 && f == 0.0f) {
976 curve->table_entries = 0;
977 curve->table_8 = nullptr;
978 curve->table_16 = nullptr;
979 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
980 }
981 }
982 }
983
984 return true;
985 }
986
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)987 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
988 if (!profile || !profile->buffer || !tag) { return; }
989 if (idx > profile->tag_count) { return; }
990 const tag_Layout* tags = get_tag_table(profile);
991 tag->signature = read_big_u32(tags[idx].signature);
992 tag->size = read_big_u32(tags[idx].size);
993 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
994 tag->type = read_big_u32(tag->buf);
995 }
996
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)997 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
998 if (!profile || !profile->buffer || !tag) { return false; }
999 const tag_Layout* tags = get_tag_table(profile);
1000 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1001 if (read_big_u32(tags[i].signature) == sig) {
1002 tag->signature = sig;
1003 tag->size = read_big_u32(tags[i].size);
1004 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
1005 tag->type = read_big_u32(tag->buf);
1006 return true;
1007 }
1008 }
1009 return false;
1010 }
1011
usable_as_src(const skcms_ICCProfile * profile)1012 static bool usable_as_src(const skcms_ICCProfile* profile) {
1013 return profile->has_A2B
1014 || (profile->has_trc && profile->has_toXYZD50);
1015 }
1016
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)1017 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
1018 assert(SAFE_SIZEOF(header_Layout) == 132);
1019
1020 if (!profile) {
1021 return false;
1022 }
1023 memset(profile, 0, SAFE_SIZEOF(*profile));
1024
1025 if (len < SAFE_SIZEOF(header_Layout)) {
1026 return false;
1027 }
1028
1029 // Byte-swap all header fields
1030 const header_Layout* header = (const header_Layout*)buf;
1031 profile->buffer = (const uint8_t*)buf;
1032 profile->size = read_big_u32(header->size);
1033 uint32_t version = read_big_u32(header->version);
1034 profile->data_color_space = read_big_u32(header->data_color_space);
1035 profile->pcs = read_big_u32(header->pcs);
1036 uint32_t signature = read_big_u32(header->signature);
1037 float illuminant_X = read_big_fixed(header->illuminant_X);
1038 float illuminant_Y = read_big_fixed(header->illuminant_Y);
1039 float illuminant_Z = read_big_fixed(header->illuminant_Z);
1040 profile->tag_count = read_big_u32(header->tag_count);
1041
1042 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1043 // and major version
1044 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1045 if (signature != skcms_Signature_acsp ||
1046 profile->size > len ||
1047 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1048 (version >> 24) > 4) {
1049 return false;
1050 }
1051
1052 // Validate that illuminant is D50 white
1053 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1054 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1055 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1056 return false;
1057 }
1058
1059 // Validate that all tag entries have sane offset + size
1060 const tag_Layout* tags = get_tag_table(profile);
1061 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1062 uint32_t tag_offset = read_big_u32(tags[i].offset);
1063 uint32_t tag_size = read_big_u32(tags[i].size);
1064 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1065 if (tag_size < 4 || tag_end > profile->size) {
1066 return false;
1067 }
1068 }
1069
1070 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1071 return false;
1072 }
1073
1074 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1075
1076 // Pre-parse commonly used tags.
1077 skcms_ICCTag kTRC;
1078 if (profile->data_color_space == skcms_Signature_Gray &&
1079 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1080 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1081 // Malformed tag
1082 return false;
1083 }
1084 profile->trc[1] = profile->trc[0];
1085 profile->trc[2] = profile->trc[0];
1086 profile->has_trc = true;
1087
1088 if (pcs_is_xyz) {
1089 profile->toXYZD50.vals[0][0] = illuminant_X;
1090 profile->toXYZD50.vals[1][1] = illuminant_Y;
1091 profile->toXYZD50.vals[2][2] = illuminant_Z;
1092 profile->has_toXYZD50 = true;
1093 }
1094 } else {
1095 skcms_ICCTag rTRC, gTRC, bTRC;
1096 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1097 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1098 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1099 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1100 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1101 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1102 // Malformed TRC tags
1103 return false;
1104 }
1105 profile->has_trc = true;
1106 }
1107
1108 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1109 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1110 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1111 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1112 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1113 // Malformed XYZ tags
1114 return false;
1115 }
1116 profile->has_toXYZD50 = true;
1117 }
1118 }
1119
1120 skcms_ICCTag a2b_tag;
1121
1122 // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
1123 // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
1124 // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
1125 // and all our known users are thinking exclusively in terms of relative colormetric.
1126 const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
1127 for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
1128 if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
1129 if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
1130 // Malformed A2B tag
1131 return false;
1132 }
1133 profile->has_A2B = true;
1134 break;
1135 }
1136 }
1137
1138 return usable_as_src(profile);
1139 }
1140
1141
skcms_sRGB_profile()1142 const skcms_ICCProfile* skcms_sRGB_profile() {
1143 static const skcms_ICCProfile sRGB_profile = {
1144 nullptr, // buffer, moot here
1145
1146 0, // size, moot here
1147 skcms_Signature_RGB, // data_color_space
1148 skcms_Signature_XYZ, // pcs
1149 0, // tag count, moot here
1150
1151 // We choose to represent sRGB with its canonical transfer function,
1152 // and with its canonical XYZD50 gamut matrix.
1153 true, // has_trc, followed by the 3 trc curves
1154 {
1155 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1156 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1157 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1158 },
1159
1160 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1161 {{
1162 { 0.436065674f, 0.385147095f, 0.143066406f },
1163 { 0.222488403f, 0.716873169f, 0.060607910f },
1164 { 0.013916016f, 0.097076416f, 0.714096069f },
1165 }},
1166
1167 false, // has_A2B, followed by a2b itself which we don't care about.
1168 {
1169 0,
1170 {
1171 {{0, {0,0, 0,0,0,0,0}}},
1172 {{0, {0,0, 0,0,0,0,0}}},
1173 {{0, {0,0, 0,0,0,0,0}}},
1174 {{0, {0,0, 0,0,0,0,0}}},
1175 },
1176 {0,0,0,0},
1177 nullptr,
1178 nullptr,
1179
1180 0,
1181 {
1182 {{0, {0,0, 0,0,0,0,0}}},
1183 {{0, {0,0, 0,0,0,0,0}}},
1184 {{0, {0,0, 0,0,0,0,0}}},
1185 },
1186 {{
1187 { 0,0,0,0 },
1188 { 0,0,0,0 },
1189 { 0,0,0,0 },
1190 }},
1191
1192 0,
1193 {
1194 {{0, {0,0, 0,0,0,0,0}}},
1195 {{0, {0,0, 0,0,0,0,0}}},
1196 {{0, {0,0, 0,0,0,0,0}}},
1197 },
1198 },
1199 };
1200 return &sRGB_profile;
1201 }
1202
skcms_XYZD50_profile()1203 const skcms_ICCProfile* skcms_XYZD50_profile() {
1204 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1205 static const skcms_ICCProfile XYZD50_profile = {
1206 nullptr, // buffer, moot here
1207
1208 0, // size, moot here
1209 skcms_Signature_RGB, // data_color_space
1210 skcms_Signature_XYZ, // pcs
1211 0, // tag count, moot here
1212
1213 true, // has_trc, followed by the 3 trc curves
1214 {
1215 {{0, {1,1, 0,0,0,0,0}}},
1216 {{0, {1,1, 0,0,0,0,0}}},
1217 {{0, {1,1, 0,0,0,0,0}}},
1218 },
1219
1220 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1221 {{
1222 { 1,0,0 },
1223 { 0,1,0 },
1224 { 0,0,1 },
1225 }},
1226
1227 false, // has_A2B, followed by a2b itself which we don't care about.
1228 {
1229 0,
1230 {
1231 {{0, {0,0, 0,0,0,0,0}}},
1232 {{0, {0,0, 0,0,0,0,0}}},
1233 {{0, {0,0, 0,0,0,0,0}}},
1234 {{0, {0,0, 0,0,0,0,0}}},
1235 },
1236 {0,0,0,0},
1237 nullptr,
1238 nullptr,
1239
1240 0,
1241 {
1242 {{0, {0,0, 0,0,0,0,0}}},
1243 {{0, {0,0, 0,0,0,0,0}}},
1244 {{0, {0,0, 0,0,0,0,0}}},
1245 },
1246 {{
1247 { 0,0,0,0 },
1248 { 0,0,0,0 },
1249 { 0,0,0,0 },
1250 }},
1251
1252 0,
1253 {
1254 {{0, {0,0, 0,0,0,0,0}}},
1255 {{0, {0,0, 0,0,0,0,0}}},
1256 {{0, {0,0, 0,0,0,0,0}}},
1257 },
1258 },
1259 };
1260
1261 return &XYZD50_profile;
1262 }
1263
skcms_sRGB_TransferFunction()1264 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1265 return &skcms_sRGB_profile()->trc[0].parametric;
1266 }
1267
skcms_sRGB_Inverse_TransferFunction()1268 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1269 static const skcms_TransferFunction sRGB_inv =
1270 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1271 return &sRGB_inv;
1272 }
1273
skcms_Identity_TransferFunction()1274 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1275 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1276 return &identity;
1277 }
1278
1279 const uint8_t skcms_252_random_bytes[] = {
1280 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1281 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1282 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1283 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1284 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1285 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1286 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1287 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1288 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1289 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1290 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1291 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1292 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1293 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1294 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1295 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1296 };
1297
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1298 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1299 // Test for exactly equal profiles first.
1300 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1301 return true;
1302 }
1303
1304 // For now this is the essentially the same strategy we use in test_only.c
1305 // for our skcms_Transform() smoke tests:
1306 // 1) transform A to XYZD50
1307 // 2) transform B to XYZD50
1308 // 3) return true if they're similar enough
1309 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1310
1311 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1312 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1313
1314 if (A->data_color_space != B->data_color_space) {
1315 return false;
1316 }
1317
1318 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1319 // TODO: working with RGBA_8888 either way is probably fastest.
1320 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1321 size_t npixels = 84;
1322 if (A->data_color_space == skcms_Signature_CMYK) {
1323 fmt = skcms_PixelFormat_RGBA_8888;
1324 npixels = 63;
1325 }
1326
1327 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1328 // use pre-canned results and skip that skcms_Transform() call?
1329 uint8_t dstA[252],
1330 dstB[252];
1331 if (!skcms_Transform(
1332 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1333 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1334 npixels)) {
1335 return false;
1336 }
1337 if (!skcms_Transform(
1338 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1339 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1340 npixels)) {
1341 return false;
1342 }
1343
1344 // TODO: make sure this final check has reasonable codegen.
1345 for (size_t i = 0; i < 252; i++) {
1346 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1347 return false;
1348 }
1349 }
1350 return true;
1351 }
1352
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1353 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1354 const skcms_TransferFunction* inv_tf) {
1355 if (!profile || !profile->has_trc) {
1356 return false;
1357 }
1358
1359 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1360 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1361 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1362 }
1363
is_zero_to_one(float x)1364 static bool is_zero_to_one(float x) {
1365 return 0 <= x && x <= 1;
1366 }
1367
1368 typedef struct { float vals[3]; } skcms_Vector3;
1369
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1370 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1371 skcms_Vector3 dst = {{0,0,0}};
1372 for (int row = 0; row < 3; ++row) {
1373 dst.vals[row] = m->vals[row][0] * v->vals[0]
1374 + m->vals[row][1] * v->vals[1]
1375 + m->vals[row][2] * v->vals[2];
1376 }
1377 return dst;
1378 }
1379
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1380 bool skcms_AdaptToXYZD50(float wx, float wy,
1381 skcms_Matrix3x3* toXYZD50) {
1382 if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1383 !toXYZD50) {
1384 return false;
1385 }
1386
1387 // Assumes that Y is 1.0f.
1388 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1389
1390 // Now convert toXYZ matrix to toXYZD50.
1391 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1392
1393 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1394 // the matrices below. The Bradford method is used by Adobe and is widely considered
1395 // to be the best.
1396 skcms_Matrix3x3 xyz_to_lms = {{
1397 { 0.8951f, 0.2664f, -0.1614f },
1398 { -0.7502f, 1.7135f, 0.0367f },
1399 { 0.0389f, -0.0685f, 1.0296f },
1400 }};
1401 skcms_Matrix3x3 lms_to_xyz = {{
1402 { 0.9869929f, -0.1470543f, 0.1599627f },
1403 { 0.4323053f, 0.5183603f, 0.0492912f },
1404 { -0.0085287f, 0.0400428f, 0.9684867f },
1405 }};
1406
1407 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1408 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1409
1410 *toXYZD50 = {{
1411 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1412 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1413 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1414 }};
1415 *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1416 *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1417
1418 return true;
1419 }
1420
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1421 bool skcms_PrimariesToXYZD50(float rx, float ry,
1422 float gx, float gy,
1423 float bx, float by,
1424 float wx, float wy,
1425 skcms_Matrix3x3* toXYZD50) {
1426 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1427 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1428 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1429 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1430 !toXYZD50) {
1431 return false;
1432 }
1433
1434 // First, we need to convert xy values (primaries) to XYZ.
1435 skcms_Matrix3x3 primaries = {{
1436 { rx, gx, bx },
1437 { ry, gy, by },
1438 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1439 }};
1440 skcms_Matrix3x3 primaries_inv;
1441 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1442 return false;
1443 }
1444
1445 // Assumes that Y is 1.0f.
1446 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1447 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1448
1449 skcms_Matrix3x3 toXYZ = {{
1450 { XYZ.vals[0], 0, 0 },
1451 { 0, XYZ.vals[1], 0 },
1452 { 0, 0, XYZ.vals[2] },
1453 }};
1454 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1455
1456 skcms_Matrix3x3 DXtoD50;
1457 if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1458 return false;
1459 }
1460
1461 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1462 return true;
1463 }
1464
1465
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1466 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1467 double a00 = src->vals[0][0],
1468 a01 = src->vals[1][0],
1469 a02 = src->vals[2][0],
1470 a10 = src->vals[0][1],
1471 a11 = src->vals[1][1],
1472 a12 = src->vals[2][1],
1473 a20 = src->vals[0][2],
1474 a21 = src->vals[1][2],
1475 a22 = src->vals[2][2];
1476
1477 double b0 = a00*a11 - a01*a10,
1478 b1 = a00*a12 - a02*a10,
1479 b2 = a01*a12 - a02*a11,
1480 b3 = a20,
1481 b4 = a21,
1482 b5 = a22;
1483
1484 double determinant = b0*b5
1485 - b1*b4
1486 + b2*b3;
1487
1488 if (determinant == 0) {
1489 return false;
1490 }
1491
1492 double invdet = 1.0 / determinant;
1493 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1494 return false;
1495 }
1496
1497 b0 *= invdet;
1498 b1 *= invdet;
1499 b2 *= invdet;
1500 b3 *= invdet;
1501 b4 *= invdet;
1502 b5 *= invdet;
1503
1504 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1505 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1506 dst->vals[2][0] = (float)( + b2 );
1507 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1508 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1509 dst->vals[2][1] = (float)( - b1 );
1510 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1511 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1512 dst->vals[2][2] = (float)( + b0 );
1513
1514 for (int r = 0; r < 3; ++r)
1515 for (int c = 0; c < 3; ++c) {
1516 if (!isfinitef_(dst->vals[r][c])) {
1517 return false;
1518 }
1519 }
1520 return true;
1521 }
1522
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1523 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1524 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1525 for (int r = 0; r < 3; r++)
1526 for (int c = 0; c < 3; c++) {
1527 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1528 + A->vals[r][1] * B->vals[1][c]
1529 + A->vals[r][2] * B->vals[2][c];
1530 }
1531 return m;
1532 }
1533
1534 #if defined(__clang__)
1535 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1536 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1537 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1538 TF_PQish pq;
1539 TF_HLGish hlg;
1540 switch (classify(*src, &pq, &hlg)) {
1541 case Bad: return false;
1542 case sRGBish: break; // handled below
1543
1544 case PQish:
1545 *dst = { TFKind_marker(PQish), -pq.A, pq.D, 1.0f/pq.F
1546 , pq.B, -pq.E, 1.0f/pq.C};
1547 return true;
1548
1549 case HLGish:
1550 *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1551 , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1552 return true;
1553
1554 case HLGinvish:
1555 *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1556 , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1557 return true;
1558 }
1559
1560 assert (classify(*src) == sRGBish);
1561
1562 // We're inverting this function, solving for x in terms of y.
1563 // y = (cx + f) x < d
1564 // (ax + b)^g + e x ≥ d
1565 // The inverse of this function can be expressed in the same piecewise form.
1566 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1567
1568 // We'll start by finding the new threshold inv.d.
1569 // In principle we should be able to find that by solving for y at x=d from either side.
1570 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1571 float d_l = src->c * src->d + src->f,
1572 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1573 if (fabsf_(d_l - d_r) > 1/512.0f) {
1574 return false;
1575 }
1576 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1577
1578 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1579 if (inv.d > 0) {
1580 // Inverting the linear section is pretty straightfoward:
1581 // y = cx + f
1582 // y - f = cx
1583 // (1/c)y - f/c = x
1584 inv.c = 1.0f/src->c;
1585 inv.f = -src->f/src->c;
1586 }
1587
1588 // The interesting part is inverting the nonlinear section:
1589 // y = (ax + b)^g + e.
1590 // y - e = (ax + b)^g
1591 // (y - e)^1/g = ax + b
1592 // (y - e)^1/g - b = ax
1593 // (1/a)(y - e)^1/g - b/a = x
1594 //
1595 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1596 // let k = (1/a)^g
1597 // (1/a)( y - e)^1/g - b/a = x
1598 // (ky - ke)^1/g - b/a = x
1599
1600 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1601 inv.g = 1.0f / src->g;
1602 inv.a = k;
1603 inv.b = -k * src->e;
1604 inv.e = -src->b / src->a;
1605
1606 // We need to enforce the same constraints here that we do when fitting a curve,
1607 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1608 // of the source function if we're here.
1609
1610 // Just like when fitting the curve, there's really no way to rescue a < 0.
1611 if (inv.a < 0) {
1612 return false;
1613 }
1614 // On the other hand we can rescue an ad+b that's gone slightly negative here.
1615 if (inv.a * inv.d + inv.b < 0) {
1616 inv.b = -inv.a * inv.d;
1617 }
1618
1619 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1620 // where we might still fail here, like non-finite parameter values.
1621 if (classify(inv) != sRGBish) {
1622 return false;
1623 }
1624
1625 assert (inv.a >= 0);
1626 assert (inv.a * inv.d + inv.b >= 0);
1627
1628 // Now in principle we're done.
1629 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1630 // e or f of the inverse, depending on which segment contains src(1.0f).
1631 float s = skcms_TransferFunction_eval(src, 1.0f);
1632 if (!isfinitef_(s)) {
1633 return false;
1634 }
1635
1636 float sign = s < 0 ? -1.0f : 1.0f;
1637 s *= sign;
1638 if (s < inv.d) {
1639 inv.f = 1.0f - sign * inv.c * s;
1640 } else {
1641 inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1642 }
1643
1644 *dst = inv;
1645 return classify(*dst) == sRGBish;
1646 }
1647
1648 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1649
1650 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1651 //
1652 // tf(x) = cx + f x < d
1653 // tf(x) = (ax + b)^g + e x ≥ d
1654 //
1655 // When fitting, we add the additional constraint that both pieces meet at d:
1656 //
1657 // cd + f = (ad + b)^g + e
1658 //
1659 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1660 //
1661 // tf(x) = cx + f x < d
1662 // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1663 //
1664 // Our overall strategy is then:
1665 // For a couple tolerances,
1666 // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1667 // - invert c,d,f
1668 // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1669 // (and by constraint, inverted e) to the inverse of the table.
1670 // Return the parameters with least maximum error.
1671 //
1672 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1673 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1674 //
1675 // let y = Table(x)
1676 // r(x) = x - f_inv(y)
1677 //
1678 // ∂r/∂g = ln(ay + b)*(ay + b)^g
1679 // - ln(ad + b)*(ad + b)^g
1680 // ∂r/∂a = yg(ay + b)^(g-1)
1681 // - dg(ad + b)^(g-1)
1682 // ∂r/∂b = g(ay + b)^(g-1)
1683 // - g(ad + b)^(g-1)
1684
1685 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1686 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])1687 static float rg_nonlinear(float x,
1688 const skcms_Curve* curve,
1689 const skcms_TransferFunction* tf,
1690 float dfdP[3]) {
1691 const float y = eval_curve(curve, x);
1692
1693 const float g = tf->g, a = tf->a, b = tf->b,
1694 c = tf->c, d = tf->d, f = tf->f;
1695
1696 const float Y = fmaxf_(a*y + b, 0.0f),
1697 D = a*d + b;
1698 assert (D >= 0);
1699
1700 // The gradient.
1701 dfdP[0] = logf_(Y)*powf_(Y, g)
1702 - logf_(D)*powf_(D, g);
1703 dfdP[1] = y*g*powf_(Y, g-1)
1704 - d*g*powf_(D, g-1);
1705 dfdP[2] = g*powf_(Y, g-1)
1706 - g*powf_(D, g-1);
1707
1708 // The residual.
1709 const float f_inv = powf_(Y, g)
1710 - powf_(D, g)
1711 + c*d + f;
1712 return x - f_inv;
1713 }
1714
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)1715 static bool gauss_newton_step(const skcms_Curve* curve,
1716 skcms_TransferFunction* tf,
1717 float x0, float dx, int N) {
1718 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1719 //
1720 // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
1721 //
1722 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1723 // where r(P) is the residual vector
1724 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
1725 //
1726 // Let's review the shape of each of these expressions:
1727 // r(P) is [N x 1], a column vector with one entry per value of x tested
1728 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
1729 // Jf^T is [3 x N], the transpose of Jf
1730 //
1731 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1732 // and so is its inverse (Jf^T Jf)^-1
1733 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1734 //
1735 // Our implementation strategy to get to the final ∆P is
1736 // 1) evaluate Jf^T Jf, call that lhs
1737 // 2) evaluate Jf^T r(P), call that rhs
1738 // 3) invert lhs
1739 // 4) multiply inverse lhs by rhs
1740 //
1741 // This is a friendly implementation strategy because we don't have to have any
1742 // buffers that scale with N, and equally nice don't have to perform any matrix
1743 // operations that are variable size.
1744 //
1745 // Other implementation strategies could trade this off, e.g. evaluating the
1746 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1747 // the residuals. That would probably require implementing singular value
1748 // decomposition, and would create a [3 x N] matrix to be multiplied by the
1749 // [N x 1] residual vector, but on the upside I think that'd eliminate the
1750 // possibility of this gauss_newton_step() function ever failing.
1751
1752 // 0) start off with lhs and rhs safely zeroed.
1753 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1754 skcms_Vector3 rhs = { {0,0,0} };
1755
1756 // 1,2) evaluate lhs and evaluate rhs
1757 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1758 // so we'll have to update lhs and rhs at the same time.
1759 for (int i = 0; i < N; i++) {
1760 float x = x0 + i*dx;
1761
1762 float dfdP[3] = {0,0,0};
1763 float resid = rg_nonlinear(x,curve,tf, dfdP);
1764
1765 for (int r = 0; r < 3; r++) {
1766 for (int c = 0; c < 3; c++) {
1767 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1768 }
1769 rhs.vals[r] += dfdP[r] * resid;
1770 }
1771 }
1772
1773 // If any of the 3 P parameters are unused, this matrix will be singular.
1774 // Detect those cases and fix them up to indentity instead, so we can invert.
1775 for (int k = 0; k < 3; k++) {
1776 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1777 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1778 lhs.vals[k][k] = 1;
1779 }
1780 }
1781
1782 // 3) invert lhs
1783 skcms_Matrix3x3 lhs_inv;
1784 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1785 return false;
1786 }
1787
1788 // 4) multiply inverse lhs by rhs
1789 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1790 tf->g += dP.vals[0];
1791 tf->a += dP.vals[1];
1792 tf->b += dP.vals[2];
1793 return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
1794 }
1795
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)1796 static float max_roundtrip_error_checked(const skcms_Curve* curve,
1797 const skcms_TransferFunction* tf_inv) {
1798 skcms_TransferFunction tf;
1799 if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
1800 return INFINITY_;
1801 }
1802
1803 skcms_TransferFunction tf_inv_again;
1804 if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
1805 return INFINITY_;
1806 }
1807
1808 return skcms_MaxRoundtripError(curve, &tf_inv_again);
1809 }
1810
1811 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1812 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1813 // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
1814 auto fixup_tf = [tf]() {
1815 // a must be non-negative. That ensures the function is monotonically increasing.
1816 // We don't really know how to fix up a if it goes negative.
1817 if (tf->a < 0) {
1818 return false;
1819 }
1820 // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
1821 // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
1822 if (tf->a * tf->d + tf->b < 0) {
1823 tf->b = -tf->a * tf->d;
1824 }
1825 assert (tf->a >= 0 &&
1826 tf->a * tf->d + tf->b >= 0);
1827
1828 // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
1829 // parameter so we can guarantee this.
1830 tf->e = tf->c*tf->d + tf->f
1831 - powf_(tf->a*tf->d + tf->b, tf->g);
1832
1833 return true;
1834 };
1835
1836 if (!fixup_tf()) {
1837 return false;
1838 }
1839
1840 // No matter where we start, dx should always represent N even steps from 0 to 1.
1841 const float dx = 1.0f / (N-1);
1842
1843 skcms_TransferFunction best_tf = *tf;
1844 float best_max_error = INFINITY_;
1845
1846 // Need this or several curves get worse... *sigh*
1847 float init_error = max_roundtrip_error_checked(curve, tf);
1848 if (init_error < best_max_error) {
1849 best_max_error = init_error;
1850 best_tf = *tf;
1851 }
1852
1853 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
1854 for (int j = 0; j < 8; j++) {
1855 if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) {
1856 *tf = best_tf;
1857 return isfinitef_(best_max_error);
1858 }
1859
1860 float max_error = max_roundtrip_error_checked(curve, tf);
1861 if (max_error < best_max_error) {
1862 best_max_error = max_error;
1863 best_tf = *tf;
1864 }
1865 }
1866
1867 *tf = best_tf;
1868 return isfinitef_(best_max_error);
1869 }
1870
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1871 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1872 skcms_TransferFunction* approx,
1873 float* max_error) {
1874 if (!curve || !approx || !max_error) {
1875 return false;
1876 }
1877
1878 if (curve->table_entries == 0) {
1879 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1880 return false;
1881 }
1882
1883 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1884 // We need at least two points, and must put some reasonable cap on the maximum number.
1885 return false;
1886 }
1887
1888 int N = (int)curve->table_entries;
1889 const float dx = 1.0f / (N - 1);
1890
1891 *max_error = INFINITY_;
1892 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1893 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1894 skcms_TransferFunction tf,
1895 tf_inv;
1896
1897 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1898 tf.f = 0.0f;
1899 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1900
1901 if (L == N) {
1902 // If the entire data set was linear, move the coefficients to the nonlinear portion
1903 // with G == 1. This lets use a canonical representation with d == 0.
1904 tf.g = 1;
1905 tf.a = tf.c;
1906 tf.b = tf.f;
1907 tf.c = tf.d = tf.e = tf.f = 0;
1908 } else if (L == N - 1) {
1909 // Degenerate case with only two points in the nonlinear segment. Solve directly.
1910 tf.g = 1;
1911 tf.a = (eval_curve(curve, (N-1)*dx) -
1912 eval_curve(curve, (N-2)*dx))
1913 / dx;
1914 tf.b = eval_curve(curve, (N-2)*dx)
1915 - tf.a * (N-2)*dx;
1916 tf.e = 0;
1917 } else {
1918 // Start by guessing a gamma-only curve through the midpoint.
1919 int mid = (L + N) / 2;
1920 float mid_x = mid / (N - 1.0f);
1921 float mid_y = eval_curve(curve, mid_x);
1922 tf.g = log2f_(mid_y) / log2f_(mid_x);
1923 tf.a = 1;
1924 tf.b = 0;
1925 tf.e = tf.c*tf.d + tf.f
1926 - powf_(tf.a*tf.d + tf.b, tf.g);
1927
1928
1929 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1930 !fit_nonlinear(curve, L,N, &tf_inv)) {
1931 continue;
1932 }
1933
1934 // We fit tf_inv, so calculate tf to keep in sync.
1935 // fit_nonlinear() should guarantee invertibility.
1936 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1937 assert(false);
1938 continue;
1939 }
1940 }
1941
1942 // We'd better have a sane, sRGB-ish TF by now.
1943 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
1944 // anything else is just some accident of math and the way we pun tf.g as a type flag.
1945 // fit_nonlinear() should guarantee this, but the special cases may fail this test.
1946 if (sRGBish != classify(tf)) {
1947 continue;
1948 }
1949
1950 // We find our error by roundtripping the table through tf_inv.
1951 //
1952 // (The most likely use case for this approximation is to be inverted and
1953 // used as the transfer function for a destination color space.)
1954 //
1955 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1956 // invertible, so re-verify that here (and use the new inverse for testing).
1957 // fit_nonlinear() should guarantee this, but the special cases that don't use
1958 // it may fail this test.
1959 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1960 continue;
1961 }
1962
1963 float err = skcms_MaxRoundtripError(curve, &tf_inv);
1964 if (*max_error > err) {
1965 *max_error = err;
1966 *approx = tf;
1967 }
1968 }
1969 return isfinitef_(*max_error);
1970 }
1971
1972 // ~~~~ Impl. of skcms_Transform() ~~~~
1973
1974 typedef enum {
1975 Op_load_a8,
1976 Op_load_g8,
1977 Op_load_8888_palette8,
1978 Op_load_4444,
1979 Op_load_565,
1980 Op_load_888,
1981 Op_load_8888,
1982 Op_load_1010102,
1983 Op_load_161616LE,
1984 Op_load_16161616LE,
1985 Op_load_161616BE,
1986 Op_load_16161616BE,
1987 Op_load_hhh,
1988 Op_load_hhhh,
1989 Op_load_fff,
1990 Op_load_ffff,
1991
1992 Op_swap_rb,
1993 Op_clamp,
1994 Op_invert,
1995 Op_force_opaque,
1996 Op_premul,
1997 Op_unpremul,
1998 Op_matrix_3x3,
1999 Op_matrix_3x4,
2000 Op_lab_to_xyz,
2001
2002 Op_tf_r,
2003 Op_tf_g,
2004 Op_tf_b,
2005 Op_tf_a,
2006
2007 Op_pq_r,
2008 Op_pq_g,
2009 Op_pq_b,
2010 Op_pq_a,
2011
2012 Op_hlg_r,
2013 Op_hlg_g,
2014 Op_hlg_b,
2015 Op_hlg_a,
2016
2017 Op_hlginv_r,
2018 Op_hlginv_g,
2019 Op_hlginv_b,
2020 Op_hlginv_a,
2021
2022 Op_table_r,
2023 Op_table_g,
2024 Op_table_b,
2025 Op_table_a,
2026
2027 Op_clut,
2028
2029 Op_store_a8,
2030 Op_store_g8,
2031 Op_store_4444,
2032 Op_store_565,
2033 Op_store_888,
2034 Op_store_8888,
2035 Op_store_1010102,
2036 Op_store_161616LE,
2037 Op_store_16161616LE,
2038 Op_store_161616BE,
2039 Op_store_16161616BE,
2040 Op_store_hhh,
2041 Op_store_hhhh,
2042 Op_store_fff,
2043 Op_store_ffff,
2044 } Op;
2045
2046 #if defined(__clang__)
2047 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2048 #elif defined(__GNUC__)
2049 // For some reason GCC accepts this nonsense, but not the more straightforward version,
2050 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2051 template <int N, typename T>
2052 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2053
2054 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2055 #endif
2056
2057 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2058
2059 namespace baseline {
2060 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2061 || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2062 #define N 1
2063 template <typename T> using V = T;
2064 using Color = float;
2065 #elif defined(__AVX512F__)
2066 #define N 16
2067 template <typename T> using V = Vec<N,T>;
2068 using Color = float;
2069 #elif defined(__AVX__)
2070 #define N 8
2071 template <typename T> using V = Vec<N,T>;
2072 using Color = float;
2073 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2074 #define N 8
2075 template <typename T> using V = Vec<N,T>;
2076 using Color = _Float16;
2077 #else
2078 #define N 4
2079 template <typename T> using V = Vec<N,T>;
2080 using Color = float;
2081 #endif
2082
2083 #include "src/Transform_inl.h"
2084 #undef N
2085 }
2086
2087 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2088 #if !defined(SKCMS_PORTABLE) && \
2089 !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2090 ((defined(__APPLE__) && defined(__apple_build_version__) && __clang_major__ >= 9) || \
2091 (!defined(__apple_build_version__) && defined(__clang__) && __clang_major__ >= 5) || \
2092 (!defined(__clang__) && defined(__GNUC__))) \
2093 && defined(__x86_64__)
2094
2095 #if !defined(__AVX2__)
2096 #if defined(__clang__)
2097 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2098 #elif defined(__GNUC__)
2099 #pragma GCC push_options
2100 #pragma GCC target("avx2,f16c")
2101 #endif
2102
2103 namespace hsw {
2104 #define USING_AVX
2105 #define USING_AVX_F16C
2106 #define USING_AVX2
2107 #define N 8
2108 template <typename T> using V = Vec<N,T>;
2109 using Color = float;
2110
2111 #include "src/Transform_inl.h"
2112
2113 // src/Transform_inl.h will undefine USING_* for us.
2114 #undef N
2115 }
2116
2117 #if defined(__clang__)
2118 #pragma clang attribute pop
2119 #elif defined(__GNUC__)
2120 #pragma GCC pop_options
2121 #endif
2122
2123 #define TEST_FOR_HSW
2124 #endif
2125
2126 #if !defined(__AVX512F__)
2127 #if defined(__clang__)
2128 #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2129 #elif defined(__GNUC__)
2130 #pragma GCC push_options
2131 #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2132 #endif
2133
2134 namespace skx {
2135 #define USING_AVX512F
2136 #define N 16
2137 template <typename T> using V = Vec<N,T>;
2138 using Color = float;
2139
2140 #include "src/Transform_inl.h"
2141
2142 // src/Transform_inl.h will undefine USING_* for us.
2143 #undef N
2144 }
2145
2146 #if defined(__clang__)
2147 #pragma clang attribute pop
2148 #elif defined(__GNUC__)
2149 #pragma GCC pop_options
2150 #endif
2151
2152 #define TEST_FOR_SKX
2153 #endif
2154
2155 #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2156 enum class CpuType { None, HSW, SKX };
cpu_type()2157 static CpuType cpu_type() {
2158 static const CpuType type = []{
2159 // See http://www.sandpile.org/x86/cpuid.htm
2160
2161 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2162 uint32_t eax, ebx, ecx, edx;
2163 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2164 : "0"(1), "2"(0));
2165 if ((edx & (1u<<25)) && // SSE
2166 (edx & (1u<<26)) && // SSE2
2167 (ecx & (1u<< 0)) && // SSE3
2168 (ecx & (1u<< 9)) && // SSSE3
2169 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2170 (ecx & (1u<<19)) && // SSE4.1
2171 (ecx & (1u<<20)) && // SSE4.2
2172 (ecx & (1u<<26)) && // XSAVE
2173 (ecx & (1u<<27)) && // OSXSAVE
2174 (ecx & (1u<<28)) && // AVX
2175 (ecx & (1u<<29))) { // F16C
2176
2177 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2178 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2179 : "0"(7), "2"(0));
2180 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2181 uint32_t xcr0, dont_need_edx;
2182 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2183
2184 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2185 (xcr0 & (1u<<2)) && // YMM register state saved?
2186 (ebx & (1u<<5))) { // AVX2
2187 // At this point we're at least HSW. Continue checking for SKX.
2188 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2189 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2190 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2191 (ebx & (1u<<16)) && // AVX512F
2192 (ebx & (1u<<17)) && // AVX512DQ
2193 (ebx & (1u<<28)) && // AVX512CD
2194 (ebx & (1u<<30)) && // AVX512BW
2195 (ebx & (1u<<31))) { // AVX512VL
2196 return CpuType::SKX;
2197 }
2198 return CpuType::HSW;
2199 }
2200 }
2201 return CpuType::None;
2202 }();
2203 return type;
2204 }
2205 #endif
2206
2207 #endif
2208
2209 typedef struct {
2210 Op op;
2211 const void* arg;
2212 } OpAndArg;
2213
select_curve_op(const skcms_Curve * curve,int channel)2214 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2215 static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2216 { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2217 { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2218 { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2219 { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2220 };
2221 const auto& op = ops[channel];
2222
2223 if (curve->table_entries == 0) {
2224 const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2225
2226 const skcms_TransferFunction& tf = curve->parametric;
2227
2228 if (tf.g == 1 && tf.a == 1 &&
2229 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2230 return noop;
2231 }
2232
2233 switch (classify(tf)) {
2234 case Bad: return noop;
2235 case sRGBish: return OpAndArg{op.sRGBish, &tf};
2236 case PQish: return OpAndArg{op.PQish, &tf};
2237 case HLGish: return OpAndArg{op.HLGish, &tf};
2238 case HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2239 }
2240 }
2241 return OpAndArg{op.table, curve};
2242 }
2243
bytes_per_pixel(skcms_PixelFormat fmt)2244 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2245 switch (fmt >> 1) { // ignore rgb/bgr
2246 case skcms_PixelFormat_A_8 >> 1: return 1;
2247 case skcms_PixelFormat_G_8 >> 1: return 1;
2248 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
2249 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2250 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2251 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2252 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2253 case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4;
2254 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2255 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2256 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2257 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2258 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2259 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2260 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2261 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2262 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2263 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2264 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2265 }
2266 assert(false);
2267 return 0;
2268 }
2269
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2270 static bool prep_for_destination(const skcms_ICCProfile* profile,
2271 skcms_Matrix3x3* fromXYZD50,
2272 skcms_TransferFunction* invR,
2273 skcms_TransferFunction* invG,
2274 skcms_TransferFunction* invB) {
2275 // We only support destinations with parametric transfer functions
2276 // and with gamuts that can be transformed from XYZD50.
2277 return profile->has_trc
2278 && profile->has_toXYZD50
2279 && profile->trc[0].table_entries == 0
2280 && profile->trc[1].table_entries == 0
2281 && profile->trc[2].table_entries == 0
2282 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2283 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2284 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2285 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2286 }
2287
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2288 bool skcms_Transform(const void* src,
2289 skcms_PixelFormat srcFmt,
2290 skcms_AlphaFormat srcAlpha,
2291 const skcms_ICCProfile* srcProfile,
2292 void* dst,
2293 skcms_PixelFormat dstFmt,
2294 skcms_AlphaFormat dstAlpha,
2295 const skcms_ICCProfile* dstProfile,
2296 size_t npixels) {
2297 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2298 dst, dstFmt, dstAlpha, dstProfile,
2299 npixels, nullptr);
2300 }
2301
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2302 bool skcms_TransformWithPalette(const void* src,
2303 skcms_PixelFormat srcFmt,
2304 skcms_AlphaFormat srcAlpha,
2305 const skcms_ICCProfile* srcProfile,
2306 void* dst,
2307 skcms_PixelFormat dstFmt,
2308 skcms_AlphaFormat dstAlpha,
2309 const skcms_ICCProfile* dstProfile,
2310 size_t nz,
2311 const void* palette) {
2312 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2313 src_bpp = bytes_per_pixel(srcFmt);
2314 // Let's just refuse if the request is absurdly big.
2315 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2316 return false;
2317 }
2318 int n = (int)nz;
2319
2320 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2321 if (!srcProfile) {
2322 srcProfile = skcms_sRGB_profile();
2323 }
2324 if (!dstProfile) {
2325 dstProfile = skcms_sRGB_profile();
2326 }
2327
2328 // We can't transform in place unless the PixelFormats are the same size.
2329 if (dst == src && dst_bpp != src_bpp) {
2330 return false;
2331 }
2332 // TODO: more careful alias rejection (like, dst == src + 1)?
2333
2334 if (needs_palette(srcFmt) && !palette) {
2335 return false;
2336 }
2337
2338 Op program [32];
2339 const void* arguments[32];
2340
2341 Op* ops = program;
2342 const void** args = arguments;
2343
2344 // These are always parametric curves of some sort.
2345 skcms_Curve dst_curves[3];
2346 dst_curves[0].table_entries =
2347 dst_curves[1].table_entries =
2348 dst_curves[2].table_entries = 0;
2349
2350 skcms_Matrix3x3 from_xyz;
2351
2352 switch (srcFmt >> 1) {
2353 default: return false;
2354 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2355 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2356 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2357 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2358 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2359 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2360 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2361 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2362 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2363 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2364 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2365 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break;
2366 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break;
2367 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2368 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2369 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2370 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2371
2372 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2373 *args++ = palette;
2374 break;
2375 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2376 *ops++ = Op_load_8888;
2377 *ops++ = Op_tf_r; *args++ = skcms_sRGB_TransferFunction();
2378 *ops++ = Op_tf_g; *args++ = skcms_sRGB_TransferFunction();
2379 *ops++ = Op_tf_b; *args++ = skcms_sRGB_TransferFunction();
2380 break;
2381 }
2382 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2383 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2384 *ops++ = Op_clamp;
2385 }
2386 if (srcFmt & 1) {
2387 *ops++ = Op_swap_rb;
2388 }
2389 skcms_ICCProfile gray_dst_profile;
2390 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2391 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2392 // luminance (Y) by the destination transfer function.
2393 gray_dst_profile = *dstProfile;
2394 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2395 dstProfile = &gray_dst_profile;
2396 }
2397
2398 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2399 // Photoshop creates CMYK images as inverse CMYK.
2400 // These happen to be the only ones we've _ever_ seen.
2401 *ops++ = Op_invert;
2402 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2403 srcAlpha = skcms_AlphaFormat_Unpremul;
2404 }
2405
2406 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2407 *ops++ = Op_force_opaque;
2408 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2409 *ops++ = Op_unpremul;
2410 }
2411
2412 if (dstProfile != srcProfile) {
2413
2414 if (!prep_for_destination(dstProfile,
2415 &from_xyz,
2416 &dst_curves[0].parametric,
2417 &dst_curves[1].parametric,
2418 &dst_curves[2].parametric)) {
2419 return false;
2420 }
2421
2422 if (srcProfile->has_A2B) {
2423 if (srcProfile->A2B.input_channels) {
2424 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2425 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2426 if (oa.arg) {
2427 *ops++ = oa.op;
2428 *args++ = oa.arg;
2429 }
2430 }
2431 *ops++ = Op_clamp;
2432 *ops++ = Op_clut;
2433 *args++ = &srcProfile->A2B;
2434 }
2435
2436 if (srcProfile->A2B.matrix_channels == 3) {
2437 for (int i = 0; i < 3; i++) {
2438 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2439 if (oa.arg) {
2440 *ops++ = oa.op;
2441 *args++ = oa.arg;
2442 }
2443 }
2444
2445 static const skcms_Matrix3x4 I = {{
2446 {1,0,0,0},
2447 {0,1,0,0},
2448 {0,0,1,0},
2449 }};
2450 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2451 *ops++ = Op_matrix_3x4;
2452 *args++ = &srcProfile->A2B.matrix;
2453 }
2454 }
2455
2456 if (srcProfile->A2B.output_channels == 3) {
2457 for (int i = 0; i < 3; i++) {
2458 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2459 if (oa.arg) {
2460 *ops++ = oa.op;
2461 *args++ = oa.arg;
2462 }
2463 }
2464 }
2465
2466 if (srcProfile->pcs == skcms_Signature_Lab) {
2467 *ops++ = Op_lab_to_xyz;
2468 }
2469
2470 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2471 for (int i = 0; i < 3; i++) {
2472 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2473 if (oa.arg) {
2474 *ops++ = oa.op;
2475 *args++ = oa.arg;
2476 }
2477 }
2478 } else {
2479 return false;
2480 }
2481
2482 // A2B sources should already be in XYZD50 at this point.
2483 // Others still need to be transformed using their toXYZD50 matrix.
2484 // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2485 // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2486 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2487 static const skcms_Matrix3x3 I = {{
2488 { 1.0f, 0.0f, 0.0f },
2489 { 0.0f, 1.0f, 0.0f },
2490 { 0.0f, 0.0f, 1.0f },
2491 }};
2492 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2493
2494 // There's a chance the source and destination gamuts are identical,
2495 // in which case we can skip the gamut transform.
2496 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2497 // Concat the entire gamut transform into from_xyz,
2498 // now slightly misnamed but it's a handy spot to stash the result.
2499 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2500 *ops++ = Op_matrix_3x3;
2501 *args++ = &from_xyz;
2502 }
2503
2504 // Encode back to dst RGB using its parametric transfer functions.
2505 for (int i = 0; i < 3; i++) {
2506 OpAndArg oa = select_curve_op(dst_curves+i, i);
2507 if (oa.arg) {
2508 assert (oa.op != Op_table_r &&
2509 oa.op != Op_table_g &&
2510 oa.op != Op_table_b &&
2511 oa.op != Op_table_a);
2512 *ops++ = oa.op;
2513 *args++ = oa.arg;
2514 }
2515 }
2516 }
2517
2518 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2519 // not just to values that fit in [0,1].
2520 //
2521 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2522 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2523 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2524 *ops++ = Op_clamp;
2525 }
2526 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2527 *ops++ = Op_force_opaque;
2528 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2529 *ops++ = Op_premul;
2530 }
2531 if (dstFmt & 1) {
2532 *ops++ = Op_swap_rb;
2533 }
2534 switch (dstFmt >> 1) {
2535 default: return false;
2536 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2537 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2538 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2539 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2540 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2541 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2542 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2543 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2544 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2545 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2546 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2547 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break;
2548 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break;
2549 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2550 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2551 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2552 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2553
2554 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2555 *ops++ = Op_tf_r; *args++ = skcms_sRGB_Inverse_TransferFunction();
2556 *ops++ = Op_tf_g; *args++ = skcms_sRGB_Inverse_TransferFunction();
2557 *ops++ = Op_tf_b; *args++ = skcms_sRGB_Inverse_TransferFunction();
2558 *ops++ = Op_store_8888;
2559 break;
2560 }
2561
2562 auto run = baseline::run_program;
2563 #if defined(TEST_FOR_HSW)
2564 switch (cpu_type()) {
2565 case CpuType::None: break;
2566 case CpuType::HSW: run = hsw::run_program; break;
2567 case CpuType::SKX: run = hsw::run_program; break;
2568 }
2569 #endif
2570 #if defined(TEST_FOR_SKX)
2571 switch (cpu_type()) {
2572 case CpuType::None: break;
2573 case CpuType::HSW: break;
2574 case CpuType::SKX: run = skx::run_program; break;
2575 }
2576 #endif
2577 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2578 return true;
2579 }
2580
assert_usable_as_destination(const skcms_ICCProfile * profile)2581 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2582 #if defined(NDEBUG)
2583 (void)profile;
2584 #else
2585 skcms_Matrix3x3 fromXYZD50;
2586 skcms_TransferFunction invR, invG, invB;
2587 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2588 #endif
2589 }
2590
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2591 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2592 skcms_Matrix3x3 fromXYZD50;
2593 if (!profile->has_trc || !profile->has_toXYZD50
2594 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2595 return false;
2596 }
2597
2598 skcms_TransferFunction tf[3];
2599 for (int i = 0; i < 3; i++) {
2600 skcms_TransferFunction inv;
2601 if (profile->trc[i].table_entries == 0
2602 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2603 tf[i] = profile->trc[i].parametric;
2604 continue;
2605 }
2606
2607 float max_error;
2608 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2609 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2610 return false;
2611 }
2612 }
2613
2614 for (int i = 0; i < 3; ++i) {
2615 profile->trc[i].table_entries = 0;
2616 profile->trc[i].parametric = tf[i];
2617 }
2618
2619 assert_usable_as_destination(profile);
2620 return true;
2621 }
2622
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2623 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2624 // Operate on a copy of profile, so we can choose the best TF for the original curves
2625 skcms_ICCProfile result = *profile;
2626 if (!skcms_MakeUsableAsDestination(&result)) {
2627 return false;
2628 }
2629
2630 int best_tf = 0;
2631 float min_max_error = INFINITY_;
2632 for (int i = 0; i < 3; i++) {
2633 skcms_TransferFunction inv;
2634 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2635 return false;
2636 }
2637
2638 float err = 0;
2639 for (int j = 0; j < 3; ++j) {
2640 err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2641 }
2642 if (min_max_error > err) {
2643 min_max_error = err;
2644 best_tf = i;
2645 }
2646 }
2647
2648 for (int i = 0; i < 3; i++) {
2649 result.trc[i].parametric = result.trc[best_tf].parametric;
2650 }
2651
2652 *profile = result;
2653 assert_usable_as_destination(profile);
2654 return true;
2655 }
2656