1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #if defined(__ARM_NEON)
17 #include <arm_neon.h>
18 #elif defined(__SSE__)
19 #include <immintrin.h>
20
21 #if defined(__clang__)
22 // That #include <immintrin.h> is usually enough, but Clang's headers
23 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24 // defined, because lots of programs on Windows would include that and
25 // it'd be a lot slower. But we want all those headers included so we
26 // can use their features after runtime checks later.
27 #include <smmintrin.h>
28 #include <avxintrin.h>
29 #include <avx2intrin.h>
30 #include <avx512fintrin.h>
31 #include <avx512dqintrin.h>
32 #endif
33 #else
34 #define SKCMS_PORTABLE
35 #endif
36
37 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
38 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
39 //
40 // Please do not use sizeof() directly, and size_t only when required.
41 // (We have no way of enforcing these requests...)
42 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
43
44 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
45 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
46
47 static const union {
48 uint32_t bits;
49 float f;
50 } inf_ = { 0x7f800000 };
51 #define INFINITY_ inf_.f
52
53 #if defined(__clang__) || defined(__GNUC__)
54 #define small_memcpy __builtin_memcpy
55 #else
56 #define small_memcpy memcpy
57 #endif
58
log2f_(float x)59 static float log2f_(float x) {
60 // The first approximation of log2(x) is its exponent 'e', minus 127.
61 int32_t bits;
62 small_memcpy(&bits, &x, sizeof(bits));
63
64 float e = (float)bits * (1.0f / (1<<23));
65
66 // If we use the mantissa too we can refine the error signficantly.
67 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
68 float m;
69 small_memcpy(&m, &m_bits, sizeof(m));
70
71 return (e - 124.225514990f
72 - 1.498030302f*m
73 - 1.725879990f/(0.3520887068f + m));
74 }
logf_(float x)75 static float logf_(float x) {
76 const float ln2 = 0.69314718f;
77 return ln2*log2f_(x);
78 }
79
exp2f_(float x)80 static float exp2f_(float x) {
81 float fract = x - floorf_(x);
82
83 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
84 - 1.490129070f*fract
85 + 27.728023300f/(4.84252568f - fract));
86
87 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
88 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
89 // INT_MIN is a power of 2 and exactly representable as a float, so it's fine.
90 if (fbits >= (float)INT_MAX) {
91 return INFINITY_;
92 } else if (fbits < (float)INT_MIN) {
93 return -INFINITY_;
94 }
95
96 int32_t bits = (int32_t)fbits;
97 small_memcpy(&x, &bits, sizeof(x));
98 return x;
99 }
100
101 // Not static, as it's used by some test tools.
powf_(float x,float y)102 float powf_(float x, float y) {
103 assert (x >= 0);
104 return (x == 0) || (x == 1) ? x
105 : exp2f_(log2f_(x) * y);
106 }
107
expf_(float x)108 static float expf_(float x) {
109 const float log2_e = 1.4426950408889634074f;
110 return exp2f_(log2_e * x);
111 }
112
fmaxf_(float x,float y)113 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)114 static float fminf_(float x, float y) { return x < y ? x : y; }
115
isfinitef_(float x)116 static bool isfinitef_(float x) { return 0 == x*0; }
117
minus_1_ulp(float x)118 static float minus_1_ulp(float x) {
119 int32_t bits;
120 memcpy(&bits, &x, sizeof(bits));
121 bits = bits - 1;
122 memcpy(&x, &bits, sizeof(bits));
123 return x;
124 }
125
126 // Most transfer functions we work with are sRGBish.
127 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
128 // and repurpose the other fields to hold the parameters of the HDR functions.
129 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
130 struct TF_PQish { float A,B,C,D,E,F; };
131 struct TF_HLGish { float R,G,a,b,c; };
132
TFKind_marker(TFKind kind)133 static float TFKind_marker(TFKind kind) {
134 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
135 return -(float)kind;
136 }
137
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)138 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
139 , TF_HLGish* hlg = nullptr) {
140 if (tf.g < 0 && (int)tf.g == tf.g) {
141 // TODO: sanity checks for PQ/HLG like we do for sRGBish.
142 switch (-(int)tf.g) {
143 case PQish: if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
144 case HLGish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
145 case HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
146 }
147 return Bad;
148 }
149
150 // Basic sanity checks for sRGBish transfer functions.
151 if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
152 // a,c,d,g should be non-negative to make any sense.
153 && tf.a >= 0
154 && tf.c >= 0
155 && tf.d >= 0
156 && tf.g >= 0
157 // Raising a negative value to a fractional tf->g produces complex numbers.
158 && tf.a * tf.d + tf.b >= 0) {
159 return sRGBish;
160 }
161
162 return Bad;
163 }
164
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)165 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
166 float A, float B, float C,
167 float D, float E, float F) {
168 *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
169 assert(classify(*tf) == PQish);
170 return true;
171 }
172
skcms_TransferFunction_makeHLGish(skcms_TransferFunction * tf,float R,float G,float a,float b,float c)173 bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf,
174 float R, float G,
175 float a, float b, float c) {
176 *tf = { TFKind_marker(HLGish), R,G, a,b,c, 0 };
177 assert(classify(*tf) == HLGish);
178 return true;
179 }
180
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)181 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
182 float sign = x < 0 ? -1.0f : 1.0f;
183 x *= sign;
184
185 TF_PQish pq;
186 TF_HLGish hlg;
187 switch (classify(*tf, &pq, &hlg)) {
188 case Bad: break;
189
190 case HLGish: return sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
191 : expf_((x-hlg.c)*hlg.a) + hlg.b);
192
193 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
194 case HLGinvish: return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
195 : hlg.a * logf_(x - hlg.b) + hlg.c);
196
197
198 case sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f
199 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
200
201 case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
202 / (pq.D + pq.E * powf_(x, pq.C)),
203 pq.F);
204 }
205 return 0;
206 }
207
208
eval_curve(const skcms_Curve * curve,float x)209 static float eval_curve(const skcms_Curve* curve, float x) {
210 if (curve->table_entries == 0) {
211 return skcms_TransferFunction_eval(&curve->parametric, x);
212 }
213
214 float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
215 int lo = (int) ix ,
216 hi = (int)(float)minus_1_ulp(ix + 1.0f);
217 float t = ix - (float)lo;
218
219 float l, h;
220 if (curve->table_8) {
221 l = curve->table_8[lo] * (1/255.0f);
222 h = curve->table_8[hi] * (1/255.0f);
223 } else {
224 uint16_t be_l, be_h;
225 memcpy(&be_l, curve->table_16 + 2*lo, 2);
226 memcpy(&be_h, curve->table_16 + 2*hi, 2);
227 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
228 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
229 l = le_l * (1/65535.0f);
230 h = le_h * (1/65535.0f);
231 }
232 return l + (h-l)*t;
233 }
234
max_roundtrip_error(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)235 static float max_roundtrip_error(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
236 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
237 const float dx = 1.0f / (N - 1);
238 float err = 0;
239 for (uint32_t i = 0; i < N; i++) {
240 float x = i * dx,
241 y = eval_curve(curve, x);
242 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
243 }
244 return err;
245 }
246
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)247 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
248 return max_roundtrip_error(curve, inv_tf) < (1/512.0f);
249 }
250
251 // Additional ICC signature values that are only used internally
252 enum {
253 // File signature
254 skcms_Signature_acsp = 0x61637370,
255
256 // Tag signatures
257 skcms_Signature_rTRC = 0x72545243,
258 skcms_Signature_gTRC = 0x67545243,
259 skcms_Signature_bTRC = 0x62545243,
260 skcms_Signature_kTRC = 0x6B545243,
261
262 skcms_Signature_rXYZ = 0x7258595A,
263 skcms_Signature_gXYZ = 0x6758595A,
264 skcms_Signature_bXYZ = 0x6258595A,
265
266 skcms_Signature_A2B0 = 0x41324230,
267 skcms_Signature_A2B1 = 0x41324231,
268 skcms_Signature_mAB = 0x6D414220,
269
270 skcms_Signature_CHAD = 0x63686164,
271
272 // Type signatures
273 skcms_Signature_curv = 0x63757276,
274 skcms_Signature_mft1 = 0x6D667431,
275 skcms_Signature_mft2 = 0x6D667432,
276 skcms_Signature_para = 0x70617261,
277 skcms_Signature_sf32 = 0x73663332,
278 // XYZ is also a PCS signature, so it's defined in skcms.h
279 // skcms_Signature_XYZ = 0x58595A20,
280 };
281
read_big_u16(const uint8_t * ptr)282 static uint16_t read_big_u16(const uint8_t* ptr) {
283 uint16_t be;
284 memcpy(&be, ptr, sizeof(be));
285 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
286 return be;
287 #else
288 #if defined(_MSC_VER)
289 return _byteswap_ushort(be);
290 #else
291 return __builtin_bswap16(be);
292 #endif
293 #endif
294 }
295
read_big_u32(const uint8_t * ptr)296 static uint32_t read_big_u32(const uint8_t* ptr) {
297 uint32_t be;
298 memcpy(&be, ptr, sizeof(be));
299 #if __BYTE_ORDER == __ORDER_BIG_ENDIAN__
300 return be;
301 #else
302 #if defined(_MSC_VER)
303 return _byteswap_ulong(be);
304 #else
305 return __builtin_bswap32(be);
306 #endif
307 #endif
308 }
309
read_big_i32(const uint8_t * ptr)310 static int32_t read_big_i32(const uint8_t* ptr) {
311 return (int32_t)read_big_u32(ptr);
312 }
313
read_big_fixed(const uint8_t * ptr)314 static float read_big_fixed(const uint8_t* ptr) {
315 return read_big_i32(ptr) * (1.0f / 65536.0f);
316 }
317
318 // Maps to an in-memory profile so that fields line up to the locations specified
319 // in ICC.1:2010, section 7.2
320 typedef struct {
321 uint8_t size [ 4];
322 uint8_t cmm_type [ 4];
323 uint8_t version [ 4];
324 uint8_t profile_class [ 4];
325 uint8_t data_color_space [ 4];
326 uint8_t pcs [ 4];
327 uint8_t creation_date_time [12];
328 uint8_t signature [ 4];
329 uint8_t platform [ 4];
330 uint8_t flags [ 4];
331 uint8_t device_manufacturer [ 4];
332 uint8_t device_model [ 4];
333 uint8_t device_attributes [ 8];
334 uint8_t rendering_intent [ 4];
335 uint8_t illuminant_X [ 4];
336 uint8_t illuminant_Y [ 4];
337 uint8_t illuminant_Z [ 4];
338 uint8_t creator [ 4];
339 uint8_t profile_id [16];
340 uint8_t reserved [28];
341 uint8_t tag_count [ 4]; // Technically not part of header, but required
342 } header_Layout;
343
344 typedef struct {
345 uint8_t signature [4];
346 uint8_t offset [4];
347 uint8_t size [4];
348 } tag_Layout;
349
get_tag_table(const skcms_ICCProfile * profile)350 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
351 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
352 }
353
354 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
355 // use of the type is for the CHAD tag that stores exactly nine values.
356 typedef struct {
357 uint8_t type [ 4];
358 uint8_t reserved [ 4];
359 uint8_t values [36];
360 } sf32_Layout;
361
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)362 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
363 skcms_ICCTag tag;
364 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
365 return false;
366 }
367
368 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
369 return false;
370 }
371
372 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
373 const uint8_t* values = sf32Tag->values;
374 for (int r = 0; r < 3; ++r)
375 for (int c = 0; c < 3; ++c, values += 4) {
376 m->vals[r][c] = read_big_fixed(values);
377 }
378 return true;
379 }
380
381 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
382 // the type are for tags/data that store exactly one triple.
383 typedef struct {
384 uint8_t type [4];
385 uint8_t reserved [4];
386 uint8_t X [4];
387 uint8_t Y [4];
388 uint8_t Z [4];
389 } XYZ_Layout;
390
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)391 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
392 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
393 return false;
394 }
395
396 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
397
398 *x = read_big_fixed(xyzTag->X);
399 *y = read_big_fixed(xyzTag->Y);
400 *z = read_big_fixed(xyzTag->Z);
401 return true;
402 }
403
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)404 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
405 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
406 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
407 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
408 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
409 }
410
411 typedef struct {
412 uint8_t type [4];
413 uint8_t reserved_a [4];
414 uint8_t function_type [2];
415 uint8_t reserved_b [2];
416 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
417 } para_Layout;
418
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)419 static bool read_curve_para(const uint8_t* buf, uint32_t size,
420 skcms_Curve* curve, uint32_t* curve_size) {
421 if (size < SAFE_FIXED_SIZE(para_Layout)) {
422 return false;
423 }
424
425 const para_Layout* paraTag = (const para_Layout*)buf;
426
427 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
428 uint16_t function_type = read_big_u16(paraTag->function_type);
429 if (function_type > kGABCDEF) {
430 return false;
431 }
432
433 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
434 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
435 return false;
436 }
437
438 if (curve_size) {
439 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
440 }
441
442 curve->table_entries = 0;
443 curve->parametric.a = 1.0f;
444 curve->parametric.b = 0.0f;
445 curve->parametric.c = 0.0f;
446 curve->parametric.d = 0.0f;
447 curve->parametric.e = 0.0f;
448 curve->parametric.f = 0.0f;
449 curve->parametric.g = read_big_fixed(paraTag->variable);
450
451 switch (function_type) {
452 case kGAB:
453 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
454 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
455 if (curve->parametric.a == 0) {
456 return false;
457 }
458 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
459 break;
460 case kGABC:
461 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
462 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
463 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
464 if (curve->parametric.a == 0) {
465 return false;
466 }
467 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
468 curve->parametric.f = curve->parametric.e;
469 break;
470 case kGABCD:
471 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
472 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
473 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
474 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
475 break;
476 case kGABCDEF:
477 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
478 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
479 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
480 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
481 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
482 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
483 break;
484 }
485 return classify(curve->parametric) == sRGBish;
486 }
487
488 typedef struct {
489 uint8_t type [4];
490 uint8_t reserved [4];
491 uint8_t value_count [4];
492 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
493 } curv_Layout;
494
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)495 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
496 skcms_Curve* curve, uint32_t* curve_size) {
497 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
498 return false;
499 }
500
501 const curv_Layout* curvTag = (const curv_Layout*)buf;
502
503 uint32_t value_count = read_big_u32(curvTag->value_count);
504 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
505 return false;
506 }
507
508 if (curve_size) {
509 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
510 }
511
512 if (value_count < 2) {
513 curve->table_entries = 0;
514 curve->parametric.a = 1.0f;
515 curve->parametric.b = 0.0f;
516 curve->parametric.c = 0.0f;
517 curve->parametric.d = 0.0f;
518 curve->parametric.e = 0.0f;
519 curve->parametric.f = 0.0f;
520 if (value_count == 0) {
521 // Empty tables are a shorthand for an identity curve
522 curve->parametric.g = 1.0f;
523 } else {
524 // Single entry tables are a shorthand for simple gamma
525 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
526 }
527 } else {
528 curve->table_8 = nullptr;
529 curve->table_16 = curvTag->variable;
530 curve->table_entries = value_count;
531 }
532
533 return true;
534 }
535
536 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
537 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)538 static bool read_curve(const uint8_t* buf, uint32_t size,
539 skcms_Curve* curve, uint32_t* curve_size) {
540 if (!buf || size < 4 || !curve) {
541 return false;
542 }
543
544 uint32_t type = read_big_u32(buf);
545 if (type == skcms_Signature_para) {
546 return read_curve_para(buf, size, curve, curve_size);
547 } else if (type == skcms_Signature_curv) {
548 return read_curve_curv(buf, size, curve, curve_size);
549 }
550
551 return false;
552 }
553
554 // mft1 and mft2 share a large chunk of data
555 typedef struct {
556 uint8_t type [ 4];
557 uint8_t reserved_a [ 4];
558 uint8_t input_channels [ 1];
559 uint8_t output_channels [ 1];
560 uint8_t grid_points [ 1];
561 uint8_t reserved_b [ 1];
562 uint8_t matrix [36];
563 } mft_CommonLayout;
564
565 typedef struct {
566 mft_CommonLayout common [1];
567
568 uint8_t variable [1/*variable*/];
569 } mft1_Layout;
570
571 typedef struct {
572 mft_CommonLayout common [1];
573
574 uint8_t input_table_entries [2];
575 uint8_t output_table_entries [2];
576 uint8_t variable [1/*variable*/];
577 } mft2_Layout;
578
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)579 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
580 // MFT matrices are applied before the first set of curves, but must be identity unless the
581 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
582 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
583 // field/flag.
584 a2b->matrix_channels = 0;
585
586 a2b->input_channels = mftTag->input_channels[0];
587 a2b->output_channels = mftTag->output_channels[0];
588
589 // We require exactly three (ie XYZ/Lab/RGB) output channels
590 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
591 return false;
592 }
593 // We require at least one, and no more than four (ie CMYK) input channels
594 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
595 return false;
596 }
597
598 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
599 a2b->grid_points[i] = mftTag->grid_points[0];
600 }
601 // The grid only makes sense with at least two points along each axis
602 if (a2b->grid_points[0] < 2) {
603 return false;
604 }
605
606 return true;
607 }
608
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)609 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
610 uint32_t input_table_entries, uint32_t output_table_entries,
611 skcms_A2B* a2b) {
612 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
613 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
614 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
615
616 // [input|output]_channels are <= 4, so still no overflow
617 uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table;
618 uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
619
620 uint64_t grid_size = a2b->output_channels * byte_width;
621 for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
622 grid_size *= a2b->grid_points[axis];
623 }
624
625 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
626 return false;
627 }
628
629 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
630 a2b->input_curves[i].table_entries = input_table_entries;
631 if (byte_width == 1) {
632 a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
633 a2b->input_curves[i].table_16 = nullptr;
634 } else {
635 a2b->input_curves[i].table_8 = nullptr;
636 a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
637 }
638 }
639
640 if (byte_width == 1) {
641 a2b->grid_8 = table_base + byte_len_all_input_tables;
642 a2b->grid_16 = nullptr;
643 } else {
644 a2b->grid_8 = nullptr;
645 a2b->grid_16 = table_base + byte_len_all_input_tables;
646 }
647
648 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
649 for (uint32_t i = 0; i < a2b->output_channels; ++i) {
650 a2b->output_curves[i].table_entries = output_table_entries;
651 if (byte_width == 1) {
652 a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
653 a2b->output_curves[i].table_16 = nullptr;
654 } else {
655 a2b->output_curves[i].table_8 = nullptr;
656 a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
657 }
658 }
659
660 return true;
661 }
662
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)663 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
664 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
665 return false;
666 }
667
668 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
669 if (!read_mft_common(mftTag->common, a2b)) {
670 return false;
671 }
672
673 uint32_t input_table_entries = 256;
674 uint32_t output_table_entries = 256;
675
676 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
677 input_table_entries, output_table_entries, a2b);
678 }
679
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)680 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
681 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
682 return false;
683 }
684
685 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
686 if (!read_mft_common(mftTag->common, a2b)) {
687 return false;
688 }
689
690 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
691 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
692
693 // ICC spec mandates that 2 <= table_entries <= 4096
694 if (input_table_entries < 2 || input_table_entries > 4096 ||
695 output_table_entries < 2 || output_table_entries > 4096) {
696 return false;
697 }
698
699 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
700 input_table_entries, output_table_entries, a2b);
701 }
702
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)703 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
704 uint32_t num_curves, skcms_Curve* curves) {
705 for (uint32_t i = 0; i < num_curves; ++i) {
706 if (curve_offset > size) {
707 return false;
708 }
709
710 uint32_t curve_bytes;
711 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
712 return false;
713 }
714
715 if (curve_bytes > UINT32_MAX - 3) {
716 return false;
717 }
718 curve_bytes = (curve_bytes + 3) & ~3U;
719
720 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
721 curve_offset = (uint32_t)new_offset_64;
722 if (new_offset_64 != curve_offset) {
723 return false;
724 }
725 }
726
727 return true;
728 }
729
730 typedef struct {
731 uint8_t type [ 4];
732 uint8_t reserved_a [ 4];
733 uint8_t input_channels [ 1];
734 uint8_t output_channels [ 1];
735 uint8_t reserved_b [ 2];
736 uint8_t b_curve_offset [ 4];
737 uint8_t matrix_offset [ 4];
738 uint8_t m_curve_offset [ 4];
739 uint8_t clut_offset [ 4];
740 uint8_t a_curve_offset [ 4];
741 } mAB_Layout;
742
743 typedef struct {
744 uint8_t grid_points [16];
745 uint8_t grid_byte_width [ 1];
746 uint8_t reserved [ 3];
747 uint8_t variable [1/*variable*/];
748 } mABCLUT_Layout;
749
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)750 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
751 if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
752 return false;
753 }
754
755 const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
756
757 a2b->input_channels = mABTag->input_channels[0];
758 a2b->output_channels = mABTag->output_channels[0];
759
760 // We require exactly three (ie XYZ/Lab/RGB) output channels
761 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
762 return false;
763 }
764 // We require no more than four (ie CMYK) input channels
765 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
766 return false;
767 }
768
769 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
770 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
771 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
772 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
773 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
774
775 // "B" curves must be present
776 if (0 == b_curve_offset) {
777 return false;
778 }
779
780 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
781 a2b->output_curves)) {
782 return false;
783 }
784
785 // "M" curves and Matrix must be used together
786 if (0 != m_curve_offset) {
787 if (0 == matrix_offset) {
788 return false;
789 }
790 a2b->matrix_channels = a2b->output_channels;
791 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
792 a2b->matrix_curves)) {
793 return false;
794 }
795
796 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
797 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
798 return false;
799 }
800 float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
801 const uint8_t* mtx_buf = tag->buf + matrix_offset;
802 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
803 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
804 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
805 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
806 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
807 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
808 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
809 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
810 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
811 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
812 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
813 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
814 } else {
815 if (0 != matrix_offset) {
816 return false;
817 }
818 a2b->matrix_channels = 0;
819 }
820
821 // "A" curves and CLUT must be used together
822 if (0 != a_curve_offset) {
823 if (0 == clut_offset) {
824 return false;
825 }
826 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
827 a2b->input_curves)) {
828 return false;
829 }
830
831 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
832 return false;
833 }
834 const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
835
836 if (clut->grid_byte_width[0] == 1) {
837 a2b->grid_8 = clut->variable;
838 a2b->grid_16 = nullptr;
839 } else if (clut->grid_byte_width[0] == 2) {
840 a2b->grid_8 = nullptr;
841 a2b->grid_16 = clut->variable;
842 } else {
843 return false;
844 }
845
846 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
847 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
848 a2b->grid_points[i] = clut->grid_points[i];
849 // The grid only makes sense with at least two points along each axis
850 if (a2b->grid_points[i] < 2) {
851 return false;
852 }
853 grid_size *= a2b->grid_points[i];
854 }
855 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
856 return false;
857 }
858 } else {
859 if (0 != clut_offset) {
860 return false;
861 }
862
863 // If there is no CLUT, the number of input and output channels must match
864 if (a2b->input_channels != a2b->output_channels) {
865 return false;
866 }
867
868 // Zero out the number of input channels to signal that we're skipping this stage
869 a2b->input_channels = 0;
870 }
871
872 return true;
873 }
874
875 // If you pass f, we'll fit a possibly-non-zero value for *f.
876 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)877 static int fit_linear(const skcms_Curve* curve, int N, float tol,
878 float* c, float* d, float* f = nullptr) {
879 assert(N > 1);
880 // We iteratively fit the first points to the TF's linear piece.
881 // We want the cx + f line to pass through the first and last points we fit exactly.
882 //
883 // As we walk along the points we find the minimum and maximum slope of the line before the
884 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
885 // emtpy, when we definitely can't add any more points.
886 //
887 // Some points' error intervals may intersect the running interval but not lie fully
888 // within it. So we keep track of the last point we saw that is a valid end point candidate,
889 // and once the search is done, back up to build the line through *that* point.
890 const float dx = 1.0f / (N - 1);
891
892 int lin_points = 1;
893
894 float f_zero = 0.0f;
895 if (f) {
896 *f = eval_curve(curve, 0);
897 } else {
898 f = &f_zero;
899 }
900
901
902 float slope_min = -INFINITY_;
903 float slope_max = +INFINITY_;
904 for (int i = 1; i < N; ++i) {
905 float x = i * dx;
906 float y = eval_curve(curve, x);
907
908 float slope_max_i = (y + tol - *f) / x,
909 slope_min_i = (y - tol - *f) / x;
910 if (slope_max_i < slope_min || slope_max < slope_min_i) {
911 // Slope intervals would no longer overlap.
912 break;
913 }
914 slope_max = fminf_(slope_max, slope_max_i);
915 slope_min = fmaxf_(slope_min, slope_min_i);
916
917 float cur_slope = (y - *f) / x;
918 if (slope_min <= cur_slope && cur_slope <= slope_max) {
919 lin_points = i + 1;
920 *c = cur_slope;
921 }
922 }
923
924 // Set D to the last point that met our tolerance.
925 *d = (lin_points - 1) * dx;
926 return lin_points;
927 }
928
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)929 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
930 bool ok = false;
931 if (tag->type == skcms_Signature_mft1) {
932 ok = read_tag_mft1(tag, a2b);
933 } else if (tag->type == skcms_Signature_mft2) {
934 ok = read_tag_mft2(tag, a2b);
935 } else if (tag->type == skcms_Signature_mAB) {
936 ok = read_tag_mab(tag, a2b, pcs_is_xyz);
937 }
938 if (!ok) {
939 return false;
940 }
941
942 // Detect and canonicalize identity tables.
943 skcms_Curve* curves[] = {
944 a2b->input_channels > 0 ? a2b->input_curves + 0 : nullptr,
945 a2b->input_channels > 1 ? a2b->input_curves + 1 : nullptr,
946 a2b->input_channels > 2 ? a2b->input_curves + 2 : nullptr,
947 a2b->input_channels > 3 ? a2b->input_curves + 3 : nullptr,
948 a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
949 a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
950 a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
951 a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
952 a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
953 a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
954 };
955
956 for (int i = 0; i < ARRAY_COUNT(curves); i++) {
957 skcms_Curve* curve = curves[i];
958
959 if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
960 int N = (int)curve->table_entries;
961
962 float c = 0.0f, d = 0.0f, f = 0.0f;
963 if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
964 && c == 1.0f
965 && f == 0.0f) {
966 curve->table_entries = 0;
967 curve->table_8 = nullptr;
968 curve->table_16 = nullptr;
969 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
970 }
971 }
972 }
973
974 return true;
975 }
976
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)977 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
978 if (!profile || !profile->buffer || !tag) { return; }
979 if (idx > profile->tag_count) { return; }
980 const tag_Layout* tags = get_tag_table(profile);
981 tag->signature = read_big_u32(tags[idx].signature);
982 tag->size = read_big_u32(tags[idx].size);
983 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
984 tag->type = read_big_u32(tag->buf);
985 }
986
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)987 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
988 if (!profile || !profile->buffer || !tag) { return false; }
989 const tag_Layout* tags = get_tag_table(profile);
990 for (uint32_t i = 0; i < profile->tag_count; ++i) {
991 if (read_big_u32(tags[i].signature) == sig) {
992 tag->signature = sig;
993 tag->size = read_big_u32(tags[i].size);
994 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
995 tag->type = read_big_u32(tag->buf);
996 return true;
997 }
998 }
999 return false;
1000 }
1001
usable_as_src(const skcms_ICCProfile * profile)1002 static bool usable_as_src(const skcms_ICCProfile* profile) {
1003 return profile->has_A2B
1004 || (profile->has_trc && profile->has_toXYZD50);
1005 }
1006
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)1007 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
1008 assert(SAFE_SIZEOF(header_Layout) == 132);
1009
1010 if (!profile) {
1011 return false;
1012 }
1013 memset(profile, 0, SAFE_SIZEOF(*profile));
1014
1015 if (len < SAFE_SIZEOF(header_Layout)) {
1016 return false;
1017 }
1018
1019 // Byte-swap all header fields
1020 const header_Layout* header = (const header_Layout*)buf;
1021 profile->buffer = (const uint8_t*)buf;
1022 profile->size = read_big_u32(header->size);
1023 uint32_t version = read_big_u32(header->version);
1024 profile->data_color_space = read_big_u32(header->data_color_space);
1025 profile->pcs = read_big_u32(header->pcs);
1026 uint32_t signature = read_big_u32(header->signature);
1027 float illuminant_X = read_big_fixed(header->illuminant_X);
1028 float illuminant_Y = read_big_fixed(header->illuminant_Y);
1029 float illuminant_Z = read_big_fixed(header->illuminant_Z);
1030 profile->tag_count = read_big_u32(header->tag_count);
1031
1032 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1033 // and major version
1034 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1035 if (signature != skcms_Signature_acsp ||
1036 profile->size > len ||
1037 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1038 (version >> 24) > 4) {
1039 return false;
1040 }
1041
1042 // Validate that illuminant is D50 white
1043 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1044 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1045 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1046 return false;
1047 }
1048
1049 // Validate that all tag entries have sane offset + size
1050 const tag_Layout* tags = get_tag_table(profile);
1051 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1052 uint32_t tag_offset = read_big_u32(tags[i].offset);
1053 uint32_t tag_size = read_big_u32(tags[i].size);
1054 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1055 if (tag_size < 4 || tag_end > profile->size) {
1056 return false;
1057 }
1058 }
1059
1060 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1061 return false;
1062 }
1063
1064 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1065
1066 // Pre-parse commonly used tags.
1067 skcms_ICCTag kTRC;
1068 if (profile->data_color_space == skcms_Signature_Gray &&
1069 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1070 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1071 // Malformed tag
1072 return false;
1073 }
1074 profile->trc[1] = profile->trc[0];
1075 profile->trc[2] = profile->trc[0];
1076 profile->has_trc = true;
1077
1078 if (pcs_is_xyz) {
1079 profile->toXYZD50.vals[0][0] = illuminant_X;
1080 profile->toXYZD50.vals[1][1] = illuminant_Y;
1081 profile->toXYZD50.vals[2][2] = illuminant_Z;
1082 profile->has_toXYZD50 = true;
1083 }
1084 } else {
1085 skcms_ICCTag rTRC, gTRC, bTRC;
1086 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1087 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1088 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1089 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1090 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1091 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1092 // Malformed TRC tags
1093 return false;
1094 }
1095 profile->has_trc = true;
1096 }
1097
1098 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1099 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1100 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1101 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1102 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1103 // Malformed XYZ tags
1104 return false;
1105 }
1106 profile->has_toXYZD50 = true;
1107 }
1108 }
1109
1110 skcms_ICCTag a2b_tag;
1111
1112 // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
1113 // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
1114 // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
1115 // and all our known users are thinking exclusively in terms of relative colormetric.
1116 const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
1117 for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
1118 if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
1119 if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
1120 // Malformed A2B tag
1121 return false;
1122 }
1123 profile->has_A2B = true;
1124 break;
1125 }
1126 }
1127
1128 return usable_as_src(profile);
1129 }
1130
1131
skcms_sRGB_profile()1132 const skcms_ICCProfile* skcms_sRGB_profile() {
1133 static const skcms_ICCProfile sRGB_profile = {
1134 nullptr, // buffer, moot here
1135
1136 0, // size, moot here
1137 skcms_Signature_RGB, // data_color_space
1138 skcms_Signature_XYZ, // pcs
1139 0, // tag count, moot here
1140
1141 // We choose to represent sRGB with its canonical transfer function,
1142 // and with its canonical XYZD50 gamut matrix.
1143 true, // has_trc, followed by the 3 trc curves
1144 {
1145 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1146 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1147 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1148 },
1149
1150 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1151 {{
1152 { 0.436065674f, 0.385147095f, 0.143066406f },
1153 { 0.222488403f, 0.716873169f, 0.060607910f },
1154 { 0.013916016f, 0.097076416f, 0.714096069f },
1155 }},
1156
1157 false, // has_A2B, followed by a2b itself which we don't care about.
1158 {
1159 0,
1160 {
1161 {{0, {0,0, 0,0,0,0,0}}},
1162 {{0, {0,0, 0,0,0,0,0}}},
1163 {{0, {0,0, 0,0,0,0,0}}},
1164 {{0, {0,0, 0,0,0,0,0}}},
1165 },
1166 {0,0,0,0},
1167 nullptr,
1168 nullptr,
1169
1170 0,
1171 {
1172 {{0, {0,0, 0,0,0,0,0}}},
1173 {{0, {0,0, 0,0,0,0,0}}},
1174 {{0, {0,0, 0,0,0,0,0}}},
1175 },
1176 {{
1177 { 0,0,0,0 },
1178 { 0,0,0,0 },
1179 { 0,0,0,0 },
1180 }},
1181
1182 0,
1183 {
1184 {{0, {0,0, 0,0,0,0,0}}},
1185 {{0, {0,0, 0,0,0,0,0}}},
1186 {{0, {0,0, 0,0,0,0,0}}},
1187 },
1188 },
1189 };
1190 return &sRGB_profile;
1191 }
1192
skcms_XYZD50_profile()1193 const skcms_ICCProfile* skcms_XYZD50_profile() {
1194 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1195 static const skcms_ICCProfile XYZD50_profile = {
1196 nullptr, // buffer, moot here
1197
1198 0, // size, moot here
1199 skcms_Signature_RGB, // data_color_space
1200 skcms_Signature_XYZ, // pcs
1201 0, // tag count, moot here
1202
1203 true, // has_trc, followed by the 3 trc curves
1204 {
1205 {{0, {1,1, 0,0,0,0,0}}},
1206 {{0, {1,1, 0,0,0,0,0}}},
1207 {{0, {1,1, 0,0,0,0,0}}},
1208 },
1209
1210 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1211 {{
1212 { 1,0,0 },
1213 { 0,1,0 },
1214 { 0,0,1 },
1215 }},
1216
1217 false, // has_A2B, followed by a2b itself which we don't care about.
1218 {
1219 0,
1220 {
1221 {{0, {0,0, 0,0,0,0,0}}},
1222 {{0, {0,0, 0,0,0,0,0}}},
1223 {{0, {0,0, 0,0,0,0,0}}},
1224 {{0, {0,0, 0,0,0,0,0}}},
1225 },
1226 {0,0,0,0},
1227 nullptr,
1228 nullptr,
1229
1230 0,
1231 {
1232 {{0, {0,0, 0,0,0,0,0}}},
1233 {{0, {0,0, 0,0,0,0,0}}},
1234 {{0, {0,0, 0,0,0,0,0}}},
1235 },
1236 {{
1237 { 0,0,0,0 },
1238 { 0,0,0,0 },
1239 { 0,0,0,0 },
1240 }},
1241
1242 0,
1243 {
1244 {{0, {0,0, 0,0,0,0,0}}},
1245 {{0, {0,0, 0,0,0,0,0}}},
1246 {{0, {0,0, 0,0,0,0,0}}},
1247 },
1248 },
1249 };
1250
1251 return &XYZD50_profile;
1252 }
1253
skcms_sRGB_TransferFunction()1254 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1255 return &skcms_sRGB_profile()->trc[0].parametric;
1256 }
1257
skcms_sRGB_Inverse_TransferFunction()1258 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1259 static const skcms_TransferFunction sRGB_inv =
1260 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1261 return &sRGB_inv;
1262 }
1263
skcms_Identity_TransferFunction()1264 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1265 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1266 return &identity;
1267 }
1268
1269 const uint8_t skcms_252_random_bytes[] = {
1270 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1271 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1272 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1273 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1274 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1275 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1276 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1277 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1278 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1279 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1280 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1281 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1282 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1283 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1284 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1285 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1286 };
1287
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1288 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1289 // Test for exactly equal profiles first.
1290 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1291 return true;
1292 }
1293
1294 // For now this is the essentially the same strategy we use in test_only.c
1295 // for our skcms_Transform() smoke tests:
1296 // 1) transform A to XYZD50
1297 // 2) transform B to XYZD50
1298 // 3) return true if they're similar enough
1299 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1300
1301 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1302 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1303
1304 if (A->data_color_space != B->data_color_space) {
1305 return false;
1306 }
1307
1308 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1309 // TODO: working with RGBA_8888 either way is probably fastest.
1310 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1311 size_t npixels = 84;
1312 if (A->data_color_space == skcms_Signature_CMYK) {
1313 fmt = skcms_PixelFormat_RGBA_8888;
1314 npixels = 63;
1315 }
1316
1317 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1318 // use pre-canned results and skip that skcms_Transform() call?
1319 uint8_t dstA[252],
1320 dstB[252];
1321 if (!skcms_Transform(
1322 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1323 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1324 npixels)) {
1325 return false;
1326 }
1327 if (!skcms_Transform(
1328 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1329 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1330 npixels)) {
1331 return false;
1332 }
1333
1334 // TODO: make sure this final check has reasonable codegen.
1335 for (size_t i = 0; i < 252; i++) {
1336 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1337 return false;
1338 }
1339 }
1340 return true;
1341 }
1342
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1343 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1344 const skcms_TransferFunction* inv_tf) {
1345 if (!profile || !profile->has_trc) {
1346 return false;
1347 }
1348
1349 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1350 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1351 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1352 }
1353
is_zero_to_one(float x)1354 static bool is_zero_to_one(float x) {
1355 return 0 <= x && x <= 1;
1356 }
1357
1358 typedef struct { float vals[3]; } skcms_Vector3;
1359
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1360 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1361 skcms_Vector3 dst = {{0,0,0}};
1362 for (int row = 0; row < 3; ++row) {
1363 dst.vals[row] = m->vals[row][0] * v->vals[0]
1364 + m->vals[row][1] * v->vals[1]
1365 + m->vals[row][2] * v->vals[2];
1366 }
1367 return dst;
1368 }
1369
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1370 bool skcms_PrimariesToXYZD50(float rx, float ry,
1371 float gx, float gy,
1372 float bx, float by,
1373 float wx, float wy,
1374 skcms_Matrix3x3* toXYZD50) {
1375 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1376 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1377 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1378 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1379 !toXYZD50) {
1380 return false;
1381 }
1382
1383 // First, we need to convert xy values (primaries) to XYZ.
1384 skcms_Matrix3x3 primaries = {{
1385 { rx, gx, bx },
1386 { ry, gy, by },
1387 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1388 }};
1389 skcms_Matrix3x3 primaries_inv;
1390 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1391 return false;
1392 }
1393
1394 // Assumes that Y is 1.0f.
1395 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1396 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1397
1398 skcms_Matrix3x3 toXYZ = {{
1399 { XYZ.vals[0], 0, 0 },
1400 { 0, XYZ.vals[1], 0 },
1401 { 0, 0, XYZ.vals[2] },
1402 }};
1403 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1404
1405 // Now convert toXYZ matrix to toXYZD50.
1406 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1407
1408 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1409 // the matrices below. The Bradford method is used by Adobe and is widely considered
1410 // to be the best.
1411 skcms_Matrix3x3 xyz_to_lms = {{
1412 { 0.8951f, 0.2664f, -0.1614f },
1413 { -0.7502f, 1.7135f, 0.0367f },
1414 { 0.0389f, -0.0685f, 1.0296f },
1415 }};
1416 skcms_Matrix3x3 lms_to_xyz = {{
1417 { 0.9869929f, -0.1470543f, 0.1599627f },
1418 { 0.4323053f, 0.5183603f, 0.0492912f },
1419 { -0.0085287f, 0.0400428f, 0.9684867f },
1420 }};
1421
1422 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1423 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1424
1425 skcms_Matrix3x3 DXtoD50 = {{
1426 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1427 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1428 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1429 }};
1430 DXtoD50 = skcms_Matrix3x3_concat(&DXtoD50, &xyz_to_lms);
1431 DXtoD50 = skcms_Matrix3x3_concat(&lms_to_xyz, &DXtoD50);
1432
1433 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1434 return true;
1435 }
1436
1437
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1438 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1439 double a00 = src->vals[0][0],
1440 a01 = src->vals[1][0],
1441 a02 = src->vals[2][0],
1442 a10 = src->vals[0][1],
1443 a11 = src->vals[1][1],
1444 a12 = src->vals[2][1],
1445 a20 = src->vals[0][2],
1446 a21 = src->vals[1][2],
1447 a22 = src->vals[2][2];
1448
1449 double b0 = a00*a11 - a01*a10,
1450 b1 = a00*a12 - a02*a10,
1451 b2 = a01*a12 - a02*a11,
1452 b3 = a20,
1453 b4 = a21,
1454 b5 = a22;
1455
1456 double determinant = b0*b5
1457 - b1*b4
1458 + b2*b3;
1459
1460 if (determinant == 0) {
1461 return false;
1462 }
1463
1464 double invdet = 1.0 / determinant;
1465 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1466 return false;
1467 }
1468
1469 b0 *= invdet;
1470 b1 *= invdet;
1471 b2 *= invdet;
1472 b3 *= invdet;
1473 b4 *= invdet;
1474 b5 *= invdet;
1475
1476 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1477 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1478 dst->vals[2][0] = (float)( + b2 );
1479 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1480 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1481 dst->vals[2][1] = (float)( - b1 );
1482 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1483 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1484 dst->vals[2][2] = (float)( + b0 );
1485
1486 for (int r = 0; r < 3; ++r)
1487 for (int c = 0; c < 3; ++c) {
1488 if (!isfinitef_(dst->vals[r][c])) {
1489 return false;
1490 }
1491 }
1492 return true;
1493 }
1494
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1495 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1496 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1497 for (int r = 0; r < 3; r++)
1498 for (int c = 0; c < 3; c++) {
1499 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1500 + A->vals[r][1] * B->vals[1][c]
1501 + A->vals[r][2] * B->vals[2][c];
1502 }
1503 return m;
1504 }
1505
1506 #if defined(__clang__)
1507 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1508 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1509 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1510 TF_PQish pq;
1511 TF_HLGish hlg;
1512 switch (classify(*src, &pq, &hlg)) {
1513 case Bad: return false;
1514 case sRGBish: break; // handled below
1515
1516 case PQish:
1517 *dst = { TFKind_marker(PQish), -pq.A, pq.D, 1.0f/pq.F
1518 , pq.B, -pq.E, 1.0f/pq.C};
1519 return true;
1520
1521 case HLGish:
1522 *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1523 , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1524 return true;
1525
1526 case HLGinvish:
1527 *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1528 , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1529 return true;
1530 }
1531
1532 assert (classify(*src) == sRGBish);
1533
1534 // We're inverting this function, solving for x in terms of y.
1535 // y = (cx + f) x < d
1536 // (ax + b)^g + e x ≥ d
1537 // The inverse of this function can be expressed in the same piecewise form.
1538 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1539
1540 // We'll start by finding the new threshold inv.d.
1541 // In principle we should be able to find that by solving for y at x=d from either side.
1542 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1543 float d_l = src->c * src->d + src->f,
1544 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1545 if (fabsf_(d_l - d_r) > 1/512.0f) {
1546 return false;
1547 }
1548 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1549
1550 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1551 if (inv.d > 0) {
1552 // Inverting the linear section is pretty straightfoward:
1553 // y = cx + f
1554 // y - f = cx
1555 // (1/c)y - f/c = x
1556 inv.c = 1.0f/src->c;
1557 inv.f = -src->f/src->c;
1558 }
1559
1560 // The interesting part is inverting the nonlinear section:
1561 // y = (ax + b)^g + e.
1562 // y - e = (ax + b)^g
1563 // (y - e)^1/g = ax + b
1564 // (y - e)^1/g - b = ax
1565 // (1/a)(y - e)^1/g - b/a = x
1566 //
1567 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1568 // let k = (1/a)^g
1569 // (1/a)( y - e)^1/g - b/a = x
1570 // (ky - ke)^1/g - b/a = x
1571
1572 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1573 inv.g = 1.0f / src->g;
1574 inv.a = k;
1575 inv.b = -k * src->e;
1576 inv.e = -src->b / src->a;
1577
1578 // We need to enforce the same constraints here that we do when fitting a curve,
1579 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1580 // of the source function if we're here.
1581
1582 // Just like when fitting the curve, there's really no way to rescue a < 0.
1583 if (inv.a < 0) {
1584 return false;
1585 }
1586 // On the other hand we can rescue an ad+b that's gone slightly negative here.
1587 if (inv.a * inv.d + inv.b < 0) {
1588 inv.b = -inv.a * inv.d;
1589 }
1590
1591 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1592 // where we might still fail here, like non-finite parameter values.
1593 if (classify(inv) != sRGBish) {
1594 return false;
1595 }
1596
1597 assert (inv.a >= 0);
1598 assert (inv.a * inv.d + inv.b >= 0);
1599
1600 // Now in principle we're done.
1601 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1602 // e or f of the inverse, depending on which segment contains src(1.0f).
1603 float s = skcms_TransferFunction_eval(src, 1.0f);
1604 if (!isfinitef_(s)) {
1605 return false;
1606 }
1607
1608 float sign = s < 0 ? -1.0f : 1.0f;
1609 s *= sign;
1610 if (s < inv.d) {
1611 inv.f = 1.0f - sign * inv.c * s;
1612 } else {
1613 inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1614 }
1615
1616 *dst = inv;
1617 return classify(*dst) == sRGBish;
1618 }
1619
1620 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1621
1622 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1623 //
1624 // tf(x) = cx + f x < d
1625 // tf(x) = (ax + b)^g + e x ≥ d
1626 //
1627 // When fitting, we add the additional constraint that both pieces meet at d:
1628 //
1629 // cd + f = (ad + b)^g + e
1630 //
1631 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1632 //
1633 // tf(x) = cx + f x < d
1634 // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1635 //
1636 // Our overall strategy is then:
1637 // For a couple tolerances,
1638 // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1639 // - invert c,d,f
1640 // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1641 // (and by constraint, inverted e) to the inverse of the table.
1642 // Return the parameters with least maximum error.
1643 //
1644 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1645 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1646 //
1647 // let y = Table(x)
1648 // r(x) = x - f_inv(y)
1649 //
1650 // ∂r/∂g = ln(ay + b)*(ay + b)^g
1651 // - ln(ad + b)*(ad + b)^g
1652 // ∂r/∂a = yg(ay + b)^(g-1)
1653 // - dg(ad + b)^(g-1)
1654 // ∂r/∂b = g(ay + b)^(g-1)
1655 // - g(ad + b)^(g-1)
1656
1657 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1658 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,const float P[3],float dfdP[3])1659 static float rg_nonlinear(float x,
1660 const skcms_Curve* curve,
1661 const skcms_TransferFunction* tf,
1662 const float P[3],
1663 float dfdP[3]) {
1664 const float y = eval_curve(curve, x);
1665
1666 const float g = P[0], a = P[1], b = P[2],
1667 c = tf->c, d = tf->d, f = tf->f;
1668
1669 const float Y = fmaxf_(a*y + b, 0.0f),
1670 D = a*d + b;
1671 assert (D >= 0);
1672
1673 // The gradient.
1674 dfdP[0] = logf_(Y)*powf_(Y, g)
1675 - logf_(D)*powf_(D, g);
1676 dfdP[1] = y*g*powf_(Y, g-1)
1677 - d*g*powf_(D, g-1);
1678 dfdP[2] = g*powf_(Y, g-1)
1679 - g*powf_(D, g-1);
1680
1681 // The residual.
1682 const float f_inv = powf_(Y, g)
1683 - powf_(D, g)
1684 + c*d + f;
1685 return x - f_inv;
1686 }
1687
gauss_newton_step(const skcms_Curve * curve,const skcms_TransferFunction * tf,float P[3],float x0,float dx,int N)1688 static bool gauss_newton_step(const skcms_Curve* curve,
1689 const skcms_TransferFunction* tf,
1690 float P[3],
1691 float x0, float dx, int N) {
1692 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1693 //
1694 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1695 // where r(P) is the residual vector
1696 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
1697 //
1698 // Let's review the shape of each of these expressions:
1699 // r(P) is [N x 1], a column vector with one entry per value of x tested
1700 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
1701 // Jf^T is [3 x N], the transpose of Jf
1702 //
1703 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1704 // and so is its inverse (Jf^T Jf)^-1
1705 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1706 //
1707 // Our implementation strategy to get to the final ∆P is
1708 // 1) evaluate Jf^T Jf, call that lhs
1709 // 2) evaluate Jf^T r(P), call that rhs
1710 // 3) invert lhs
1711 // 4) multiply inverse lhs by rhs
1712 //
1713 // This is a friendly implementation strategy because we don't have to have any
1714 // buffers that scale with N, and equally nice don't have to perform any matrix
1715 // operations that are variable size.
1716 //
1717 // Other implementation strategies could trade this off, e.g. evaluating the
1718 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1719 // the residuals. That would probably require implementing singular value
1720 // decomposition, and would create a [3 x N] matrix to be multiplied by the
1721 // [N x 1] residual vector, but on the upside I think that'd eliminate the
1722 // possibility of this gauss_newton_step() function ever failing.
1723
1724 // 0) start off with lhs and rhs safely zeroed.
1725 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1726 skcms_Vector3 rhs = { {0,0,0} };
1727
1728 // 1,2) evaluate lhs and evaluate rhs
1729 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1730 // so we'll have to update lhs and rhs at the same time.
1731 for (int i = 0; i < N; i++) {
1732 float x = x0 + i*dx;
1733
1734 float dfdP[3] = {0,0,0};
1735 float resid = rg_nonlinear(x,curve,tf,P, dfdP);
1736
1737 for (int r = 0; r < 3; r++) {
1738 for (int c = 0; c < 3; c++) {
1739 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1740 }
1741 rhs.vals[r] += dfdP[r] * resid;
1742 }
1743 }
1744
1745 // If any of the 3 P parameters are unused, this matrix will be singular.
1746 // Detect those cases and fix them up to indentity instead, so we can invert.
1747 for (int k = 0; k < 3; k++) {
1748 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1749 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1750 lhs.vals[k][k] = 1;
1751 }
1752 }
1753
1754 // 3) invert lhs
1755 skcms_Matrix3x3 lhs_inv;
1756 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1757 return false;
1758 }
1759
1760 // 4) multiply inverse lhs by rhs
1761 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1762 P[0] += dP.vals[0];
1763 P[1] += dP.vals[1];
1764 P[2] += dP.vals[2];
1765 return isfinitef_(P[0]) && isfinitef_(P[1]) && isfinitef_(P[2]);
1766 }
1767
1768
1769 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1770 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1771 float P[3] = { tf->g, tf->a, tf->b };
1772
1773 // No matter where we start, dx should always represent N even steps from 0 to 1.
1774 const float dx = 1.0f / (N-1);
1775
1776 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
1777 for (int j = 0; j < 2; j++) {
1778 // These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization.
1779 // We don't really know how to fix up a if it goes negative.
1780 if (P[1] < 0) {
1781 return false;
1782 }
1783 // If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero.
1784 if (P[1] * tf->d + P[2] < 0) {
1785 P[2] = -P[1] * tf->d;
1786 }
1787 assert (P[1] >= 0 &&
1788 P[1] * tf->d + P[2] >= 0);
1789
1790 if (!gauss_newton_step(curve, tf,
1791 P,
1792 L*dx, dx, N-L)) {
1793 return false;
1794 }
1795 }
1796
1797 // We need to apply our fixups one last time
1798 if (P[1] < 0) {
1799 return false;
1800 }
1801 if (P[1] * tf->d + P[2] < 0) {
1802 P[2] = -P[1] * tf->d;
1803 }
1804
1805 assert (P[1] >= 0 &&
1806 P[1] * tf->d + P[2] >= 0);
1807
1808 tf->g = P[0];
1809 tf->a = P[1];
1810 tf->b = P[2];
1811 tf->e = tf->c*tf->d + tf->f
1812 - powf_(tf->a*tf->d + tf->b, tf->g);
1813 return true;
1814 }
1815
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1816 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1817 skcms_TransferFunction* approx,
1818 float* max_error) {
1819 if (!curve || !approx || !max_error) {
1820 return false;
1821 }
1822
1823 if (curve->table_entries == 0) {
1824 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1825 return false;
1826 }
1827
1828 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1829 // We need at least two points, and must put some reasonable cap on the maximum number.
1830 return false;
1831 }
1832
1833 int N = (int)curve->table_entries;
1834 const float dx = 1.0f / (N - 1);
1835
1836 *max_error = INFINITY_;
1837 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1838 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1839 skcms_TransferFunction tf,
1840 tf_inv;
1841
1842 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1843 tf.f = 0.0f;
1844 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1845
1846 if (L == N) {
1847 // If the entire data set was linear, move the coefficients to the nonlinear portion
1848 // with G == 1. This lets use a canonical representation with d == 0.
1849 tf.g = 1;
1850 tf.a = tf.c;
1851 tf.b = tf.f;
1852 tf.c = tf.d = tf.e = tf.f = 0;
1853 } else if (L == N - 1) {
1854 // Degenerate case with only two points in the nonlinear segment. Solve directly.
1855 tf.g = 1;
1856 tf.a = (eval_curve(curve, (N-1)*dx) -
1857 eval_curve(curve, (N-2)*dx))
1858 / dx;
1859 tf.b = eval_curve(curve, (N-2)*dx)
1860 - tf.a * (N-2)*dx;
1861 tf.e = 0;
1862 } else {
1863 // Start by guessing a gamma-only curve through the midpoint.
1864 int mid = (L + N) / 2;
1865 float mid_x = mid / (N - 1.0f);
1866 float mid_y = eval_curve(curve, mid_x);
1867 tf.g = log2f_(mid_y) / log2f_(mid_x);
1868 tf.a = 1;
1869 tf.b = 0;
1870 tf.e = tf.c*tf.d + tf.f
1871 - powf_(tf.a*tf.d + tf.b, tf.g);
1872
1873
1874 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1875 !fit_nonlinear(curve, L,N, &tf_inv)) {
1876 continue;
1877 }
1878
1879 // We fit tf_inv, so calculate tf to keep in sync.
1880 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1881 continue;
1882 }
1883 }
1884
1885 // We'd better have a sane, sRGB-ish TF by now.
1886 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
1887 // anything else is just some accident of math and the way we pun tf.g as a type flag.
1888 if (sRGBish != classify(tf)) {
1889 continue;
1890 }
1891
1892 // We find our error by roundtripping the table through tf_inv.
1893 //
1894 // (The most likely use case for this approximation is to be inverted and
1895 // used as the transfer function for a destination color space.)
1896 //
1897 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1898 // invertible, so re-verify that here (and use the new inverse for testing).
1899 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1900 continue;
1901 }
1902
1903 float err = max_roundtrip_error(curve, &tf_inv);
1904 if (*max_error > err) {
1905 *max_error = err;
1906 *approx = tf;
1907 }
1908 }
1909 return isfinitef_(*max_error);
1910 }
1911
1912 // ~~~~ Impl. of skcms_Transform() ~~~~
1913
1914 typedef enum {
1915 Op_load_a8,
1916 Op_load_g8,
1917 Op_load_8888_palette8,
1918 Op_load_4444,
1919 Op_load_565,
1920 Op_load_888,
1921 Op_load_8888,
1922 Op_load_1010102,
1923 Op_load_161616LE,
1924 Op_load_16161616LE,
1925 Op_load_161616BE,
1926 Op_load_16161616BE,
1927 Op_load_hhh,
1928 Op_load_hhhh,
1929 Op_load_fff,
1930 Op_load_ffff,
1931
1932 Op_swap_rb,
1933 Op_clamp,
1934 Op_invert,
1935 Op_force_opaque,
1936 Op_premul,
1937 Op_unpremul,
1938 Op_matrix_3x3,
1939 Op_matrix_3x4,
1940 Op_lab_to_xyz,
1941
1942 Op_tf_r,
1943 Op_tf_g,
1944 Op_tf_b,
1945 Op_tf_a,
1946
1947 Op_pq_r,
1948 Op_pq_g,
1949 Op_pq_b,
1950 Op_pq_a,
1951
1952 Op_hlg_r,
1953 Op_hlg_g,
1954 Op_hlg_b,
1955 Op_hlg_a,
1956
1957 Op_hlginv_r,
1958 Op_hlginv_g,
1959 Op_hlginv_b,
1960 Op_hlginv_a,
1961
1962 Op_table_r,
1963 Op_table_g,
1964 Op_table_b,
1965 Op_table_a,
1966
1967 Op_clut,
1968
1969 Op_store_a8,
1970 Op_store_g8,
1971 Op_store_4444,
1972 Op_store_565,
1973 Op_store_888,
1974 Op_store_8888,
1975 Op_store_1010102,
1976 Op_store_161616LE,
1977 Op_store_16161616LE,
1978 Op_store_161616BE,
1979 Op_store_16161616BE,
1980 Op_store_hhh,
1981 Op_store_hhhh,
1982 Op_store_fff,
1983 Op_store_ffff,
1984 } Op;
1985
1986 #if defined(__clang__)
1987 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
1988 #elif defined(__GNUC__)
1989 // For some reason GCC accepts this nonsense, but not the more straightforward version,
1990 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
1991 template <int N, typename T>
1992 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
1993
1994 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
1995 #endif
1996
1997 // First, instantiate our default exec_ops() implementation using the default compiliation target.
1998
1999 namespace baseline {
2000 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2001 || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2002 #define N 1
2003 template <typename T> using V = T;
2004 using Color = float;
2005 #elif defined(__AVX512F__)
2006 #define N 16
2007 template <typename T> using V = Vec<N,T>;
2008 using Color = float;
2009 #elif defined(__AVX__)
2010 #define N 8
2011 template <typename T> using V = Vec<N,T>;
2012 using Color = float;
2013 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2014 #define N 8
2015 template <typename T> using V = Vec<N,T>;
2016 using Color = _Float16;
2017 #else
2018 #define N 4
2019 template <typename T> using V = Vec<N,T>;
2020 using Color = float;
2021 #endif
2022
2023 #include "src/Transform_inl.h"
2024 #undef N
2025 }
2026
2027 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2028 #if !defined(SKCMS_PORTABLE) && \
2029 !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2030 (( defined(__clang__) && __clang_major__ >= 5) || \
2031 (!defined(__clang__) && defined(__GNUC__))) \
2032 && defined(__x86_64__)
2033
2034 #if !defined(__AVX2__)
2035 #if defined(__clang__)
2036 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2037 #elif defined(__GNUC__)
2038 #pragma GCC push_options
2039 #pragma GCC target("avx2,f16c")
2040 #endif
2041
2042 namespace hsw {
2043 #define USING_AVX
2044 #define USING_AVX_F16C
2045 #define USING_AVX2
2046 #define N 8
2047 template <typename T> using V = Vec<N,T>;
2048 using Color = float;
2049
2050 #include "src/Transform_inl.h"
2051
2052 // src/Transform_inl.h will undefine USING_* for us.
2053 #undef N
2054 }
2055
2056 #if defined(__clang__)
2057 #pragma clang attribute pop
2058 #elif defined(__GNUC__)
2059 #pragma GCC pop_options
2060 #endif
2061
2062 #define TEST_FOR_HSW
2063 #endif
2064
2065 #if !defined(__AVX512F__)
2066 #if defined(__clang__)
2067 #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2068 #elif defined(__GNUC__)
2069 #pragma GCC push_options
2070 #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2071 #endif
2072
2073 namespace skx {
2074 #define USING_AVX512F
2075 #define N 16
2076 template <typename T> using V = Vec<N,T>;
2077 using Color = float;
2078
2079 #include "src/Transform_inl.h"
2080
2081 // src/Transform_inl.h will undefine USING_* for us.
2082 #undef N
2083 }
2084
2085 #if defined(__clang__)
2086 #pragma clang attribute pop
2087 #elif defined(__GNUC__)
2088 #pragma GCC pop_options
2089 #endif
2090
2091 #define TEST_FOR_SKX
2092 #endif
2093
2094 #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2095 enum class CpuType { None, HSW, SKX };
cpu_type()2096 static CpuType cpu_type() {
2097 static const CpuType type = []{
2098 // See http://www.sandpile.org/x86/cpuid.htm
2099
2100 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2101 uint32_t eax, ebx, ecx, edx;
2102 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2103 : "0"(1), "2"(0));
2104 if ((edx & (1u<<25)) && // SSE
2105 (edx & (1u<<26)) && // SSE2
2106 (ecx & (1u<< 0)) && // SSE3
2107 (ecx & (1u<< 9)) && // SSSE3
2108 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2109 (ecx & (1u<<19)) && // SSE4.1
2110 (ecx & (1u<<20)) && // SSE4.2
2111 (ecx & (1u<<26)) && // XSAVE
2112 (ecx & (1u<<27)) && // OSXSAVE
2113 (ecx & (1u<<28)) && // AVX
2114 (ecx & (1u<<29))) { // F16C
2115
2116 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2117 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2118 : "0"(7), "2"(0));
2119 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2120 uint32_t xcr0, dont_need_edx;
2121 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2122
2123 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2124 (xcr0 & (1u<<2)) && // YMM register state saved?
2125 (ebx & (1u<<5))) { // AVX2
2126 // At this point we're at least HSW. Continue checking for SKX.
2127 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2128 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2129 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2130 (ebx & (1u<<16)) && // AVX512F
2131 (ebx & (1u<<17)) && // AVX512DQ
2132 (ebx & (1u<<28)) && // AVX512CD
2133 (ebx & (1u<<30)) && // AVX512BW
2134 (ebx & (1u<<31))) { // AVX512VL
2135 return CpuType::SKX;
2136 }
2137 return CpuType::HSW;
2138 }
2139 }
2140 return CpuType::None;
2141 }();
2142 return type;
2143 }
2144 #endif
2145
2146 #endif
2147
2148 typedef struct {
2149 Op op;
2150 const void* arg;
2151 } OpAndArg;
2152
select_curve_op(const skcms_Curve * curve,int channel)2153 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2154 static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2155 { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2156 { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2157 { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2158 { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2159 };
2160 const auto& op = ops[channel];
2161
2162 if (curve->table_entries == 0) {
2163 const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2164
2165 const skcms_TransferFunction& tf = curve->parametric;
2166
2167 if (tf.g == 1 && tf.a == 1 &&
2168 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2169 return noop;
2170 }
2171
2172 switch (classify(tf)) {
2173 case Bad: return noop;
2174 case sRGBish: return OpAndArg{op.sRGBish, &tf};
2175 case PQish: return OpAndArg{op.PQish, &tf};
2176 case HLGish: return OpAndArg{op.HLGish, &tf};
2177 case HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2178 }
2179 }
2180 return OpAndArg{op.table, curve};
2181 }
2182
bytes_per_pixel(skcms_PixelFormat fmt)2183 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2184 switch (fmt >> 1) { // ignore rgb/bgr
2185 case skcms_PixelFormat_A_8 >> 1: return 1;
2186 case skcms_PixelFormat_G_8 >> 1: return 1;
2187 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
2188 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2189 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2190 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2191 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2192 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2193 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2194 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2195 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2196 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2197 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2198 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2199 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2200 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2201 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2202 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2203 }
2204 assert(false);
2205 return 0;
2206 }
2207
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2208 static bool prep_for_destination(const skcms_ICCProfile* profile,
2209 skcms_Matrix3x3* fromXYZD50,
2210 skcms_TransferFunction* invR,
2211 skcms_TransferFunction* invG,
2212 skcms_TransferFunction* invB) {
2213 // We only support destinations with parametric transfer functions
2214 // and with gamuts that can be transformed from XYZD50.
2215 return profile->has_trc
2216 && profile->has_toXYZD50
2217 && profile->trc[0].table_entries == 0
2218 && profile->trc[1].table_entries == 0
2219 && profile->trc[2].table_entries == 0
2220 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2221 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2222 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2223 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2224 }
2225
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2226 bool skcms_Transform(const void* src,
2227 skcms_PixelFormat srcFmt,
2228 skcms_AlphaFormat srcAlpha,
2229 const skcms_ICCProfile* srcProfile,
2230 void* dst,
2231 skcms_PixelFormat dstFmt,
2232 skcms_AlphaFormat dstAlpha,
2233 const skcms_ICCProfile* dstProfile,
2234 size_t npixels) {
2235 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2236 dst, dstFmt, dstAlpha, dstProfile,
2237 npixels, nullptr);
2238 }
2239
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2240 bool skcms_TransformWithPalette(const void* src,
2241 skcms_PixelFormat srcFmt,
2242 skcms_AlphaFormat srcAlpha,
2243 const skcms_ICCProfile* srcProfile,
2244 void* dst,
2245 skcms_PixelFormat dstFmt,
2246 skcms_AlphaFormat dstAlpha,
2247 const skcms_ICCProfile* dstProfile,
2248 size_t nz,
2249 const void* palette) {
2250 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2251 src_bpp = bytes_per_pixel(srcFmt);
2252 // Let's just refuse if the request is absurdly big.
2253 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2254 return false;
2255 }
2256 int n = (int)nz;
2257
2258 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2259 if (!srcProfile) {
2260 srcProfile = skcms_sRGB_profile();
2261 }
2262 if (!dstProfile) {
2263 dstProfile = skcms_sRGB_profile();
2264 }
2265
2266 // We can't transform in place unless the PixelFormats are the same size.
2267 if (dst == src && dst_bpp != src_bpp) {
2268 return false;
2269 }
2270 // TODO: more careful alias rejection (like, dst == src + 1)?
2271
2272 if (needs_palette(srcFmt) && !palette) {
2273 return false;
2274 }
2275
2276 Op program [32];
2277 const void* arguments[32];
2278
2279 Op* ops = program;
2280 const void** args = arguments;
2281
2282 // These are always parametric curves of some sort.
2283 skcms_Curve dst_curves[3];
2284 dst_curves[0].table_entries =
2285 dst_curves[1].table_entries =
2286 dst_curves[2].table_entries = 0;
2287
2288 skcms_Matrix3x3 from_xyz;
2289
2290 switch (srcFmt >> 1) {
2291 default: return false;
2292 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2293 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2294 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2295 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2296 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2297 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2298 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2299 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2300 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2301 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2302 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2303 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break;
2304 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break;
2305 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2306 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2307 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2308 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2309
2310 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2311 *args++ = palette;
2312 break;
2313 }
2314 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2315 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2316 *ops++ = Op_clamp;
2317 }
2318 if (srcFmt & 1) {
2319 *ops++ = Op_swap_rb;
2320 }
2321 skcms_ICCProfile gray_dst_profile;
2322 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2323 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2324 // luminance (Y) by the destination transfer function.
2325 gray_dst_profile = *dstProfile;
2326 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2327 dstProfile = &gray_dst_profile;
2328 }
2329
2330 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2331 // Photoshop creates CMYK images as inverse CMYK.
2332 // These happen to be the only ones we've _ever_ seen.
2333 *ops++ = Op_invert;
2334 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2335 srcAlpha = skcms_AlphaFormat_Unpremul;
2336 }
2337
2338 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2339 *ops++ = Op_force_opaque;
2340 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2341 *ops++ = Op_unpremul;
2342 }
2343
2344 if (dstProfile != srcProfile) {
2345
2346 if (!prep_for_destination(dstProfile,
2347 &from_xyz,
2348 &dst_curves[0].parametric,
2349 &dst_curves[1].parametric,
2350 &dst_curves[2].parametric)) {
2351 return false;
2352 }
2353
2354 if (srcProfile->has_A2B) {
2355 if (srcProfile->A2B.input_channels) {
2356 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2357 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2358 if (oa.arg) {
2359 *ops++ = oa.op;
2360 *args++ = oa.arg;
2361 }
2362 }
2363 *ops++ = Op_clamp;
2364 *ops++ = Op_clut;
2365 *args++ = &srcProfile->A2B;
2366 }
2367
2368 if (srcProfile->A2B.matrix_channels == 3) {
2369 for (int i = 0; i < 3; i++) {
2370 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2371 if (oa.arg) {
2372 *ops++ = oa.op;
2373 *args++ = oa.arg;
2374 }
2375 }
2376
2377 static const skcms_Matrix3x4 I = {{
2378 {1,0,0,0},
2379 {0,1,0,0},
2380 {0,0,1,0},
2381 }};
2382 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2383 *ops++ = Op_matrix_3x4;
2384 *args++ = &srcProfile->A2B.matrix;
2385 }
2386 }
2387
2388 if (srcProfile->A2B.output_channels == 3) {
2389 for (int i = 0; i < 3; i++) {
2390 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2391 if (oa.arg) {
2392 *ops++ = oa.op;
2393 *args++ = oa.arg;
2394 }
2395 }
2396 }
2397
2398 if (srcProfile->pcs == skcms_Signature_Lab) {
2399 *ops++ = Op_lab_to_xyz;
2400 }
2401
2402 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2403 for (int i = 0; i < 3; i++) {
2404 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2405 if (oa.arg) {
2406 *ops++ = oa.op;
2407 *args++ = oa.arg;
2408 }
2409 }
2410 } else {
2411 return false;
2412 }
2413
2414 // A2B sources should already be in XYZD50 at this point.
2415 // Others still need to be transformed using their toXYZD50 matrix.
2416 // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2417 // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2418 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2419 static const skcms_Matrix3x3 I = {{
2420 { 1.0f, 0.0f, 0.0f },
2421 { 0.0f, 1.0f, 0.0f },
2422 { 0.0f, 0.0f, 1.0f },
2423 }};
2424 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2425
2426 // There's a chance the source and destination gamuts are identical,
2427 // in which case we can skip the gamut transform.
2428 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2429 // Concat the entire gamut transform into from_xyz,
2430 // now slightly misnamed but it's a handy spot to stash the result.
2431 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2432 *ops++ = Op_matrix_3x3;
2433 *args++ = &from_xyz;
2434 }
2435
2436 // Encode back to dst RGB using its parametric transfer functions.
2437 for (int i = 0; i < 3; i++) {
2438 OpAndArg oa = select_curve_op(dst_curves+i, i);
2439 if (oa.arg) {
2440 assert (oa.op != Op_table_r &&
2441 oa.op != Op_table_g &&
2442 oa.op != Op_table_b &&
2443 oa.op != Op_table_a);
2444 *ops++ = oa.op;
2445 *args++ = oa.arg;
2446 }
2447 }
2448 }
2449
2450 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2451 // not just to values that fit in [0,1].
2452 //
2453 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2454 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2455 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2456 *ops++ = Op_clamp;
2457 }
2458 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2459 *ops++ = Op_force_opaque;
2460 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2461 *ops++ = Op_premul;
2462 }
2463 if (dstFmt & 1) {
2464 *ops++ = Op_swap_rb;
2465 }
2466 switch (dstFmt >> 1) {
2467 default: return false;
2468 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2469 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2470 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2471 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2472 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2473 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2474 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2475 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2476 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2477 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2478 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2479 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break;
2480 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break;
2481 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2482 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2483 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2484 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2485 }
2486
2487 auto run = baseline::run_program;
2488 #if defined(TEST_FOR_HSW)
2489 switch (cpu_type()) {
2490 case CpuType::None: break;
2491 case CpuType::HSW: run = hsw::run_program; break;
2492 case CpuType::SKX: run = hsw::run_program; break;
2493 }
2494 #endif
2495 #if defined(TEST_FOR_SKX)
2496 switch (cpu_type()) {
2497 case CpuType::None: break;
2498 case CpuType::HSW: break;
2499 case CpuType::SKX: run = skx::run_program; break;
2500 }
2501 #endif
2502 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2503 return true;
2504 }
2505
assert_usable_as_destination(const skcms_ICCProfile * profile)2506 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2507 #if defined(NDEBUG)
2508 (void)profile;
2509 #else
2510 skcms_Matrix3x3 fromXYZD50;
2511 skcms_TransferFunction invR, invG, invB;
2512 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2513 #endif
2514 }
2515
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2516 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2517 skcms_Matrix3x3 fromXYZD50;
2518 if (!profile->has_trc || !profile->has_toXYZD50
2519 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2520 return false;
2521 }
2522
2523 skcms_TransferFunction tf[3];
2524 for (int i = 0; i < 3; i++) {
2525 skcms_TransferFunction inv;
2526 if (profile->trc[i].table_entries == 0
2527 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2528 tf[i] = profile->trc[i].parametric;
2529 continue;
2530 }
2531
2532 float max_error;
2533 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2534 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2535 return false;
2536 }
2537 }
2538
2539 for (int i = 0; i < 3; ++i) {
2540 profile->trc[i].table_entries = 0;
2541 profile->trc[i].parametric = tf[i];
2542 }
2543
2544 assert_usable_as_destination(profile);
2545 return true;
2546 }
2547
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2548 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2549 // Operate on a copy of profile, so we can choose the best TF for the original curves
2550 skcms_ICCProfile result = *profile;
2551 if (!skcms_MakeUsableAsDestination(&result)) {
2552 return false;
2553 }
2554
2555 int best_tf = 0;
2556 float min_max_error = INFINITY_;
2557 for (int i = 0; i < 3; i++) {
2558 skcms_TransferFunction inv;
2559 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2560 return false;
2561 }
2562
2563 float err = 0;
2564 for (int j = 0; j < 3; ++j) {
2565 err = fmaxf_(err, max_roundtrip_error(&profile->trc[j], &inv));
2566 }
2567 if (min_max_error > err) {
2568 min_max_error = err;
2569 best_tf = i;
2570 }
2571 }
2572
2573 for (int i = 0; i < 3; i++) {
2574 result.trc[i].parametric = result.trc[best_tf].parametric;
2575 }
2576
2577 *profile = result;
2578 assert_usable_as_destination(profile);
2579 return true;
2580 }
2581