1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #if defined(__ARM_NEON)
17 #include <arm_neon.h>
18 #elif defined(__SSE__)
19 #include <immintrin.h>
20
21 #if defined(__clang__)
22 // That #include <immintrin.h> is usually enough, but Clang's headers
23 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24 // defined, because lots of programs on Windows would include that and
25 // it'd be a lot slower. But we want all those headers included so we
26 // can use their features after runtime checks later.
27 #include <smmintrin.h>
28 #include <avxintrin.h>
29 #include <avx2intrin.h>
30 #include <avx512fintrin.h>
31 #include <avx512dqintrin.h>
32 #endif
33 #endif
34
35 static bool runtime_cpu_detection = true;
skcms_DisableRuntimeCPUDetection()36 void skcms_DisableRuntimeCPUDetection() {
37 runtime_cpu_detection = false;
38 }
39
40 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
41 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
42 //
43 // Please do not use sizeof() directly, and size_t only when required.
44 // (We have no way of enforcing these requests...)
45 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
46
47 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
48 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
49
50 static const union {
51 uint32_t bits;
52 float f;
53 } inf_ = { 0x7f800000 };
54 #define INFINITY_ inf_.f
55
56 #if defined(__clang__) || defined(__GNUC__)
57 #define small_memcpy __builtin_memcpy
58 #else
59 #define small_memcpy memcpy
60 #endif
61
log2f_(float x)62 static float log2f_(float x) {
63 // The first approximation of log2(x) is its exponent 'e', minus 127.
64 int32_t bits;
65 small_memcpy(&bits, &x, sizeof(bits));
66
67 float e = (float)bits * (1.0f / (1<<23));
68
69 // If we use the mantissa too we can refine the error signficantly.
70 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
71 float m;
72 small_memcpy(&m, &m_bits, sizeof(m));
73
74 return (e - 124.225514990f
75 - 1.498030302f*m
76 - 1.725879990f/(0.3520887068f + m));
77 }
logf_(float x)78 static float logf_(float x) {
79 const float ln2 = 0.69314718f;
80 return ln2*log2f_(x);
81 }
82
exp2f_(float x)83 static float exp2f_(float x) {
84 float fract = x - floorf_(x);
85
86 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
87 - 1.490129070f*fract
88 + 27.728023300f/(4.84252568f - fract));
89
90 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
91 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
92 // Negative values are effectively underflow - we'll end up returning a (different) negative
93 // value, which makes no sense. So clamp to zero.
94 if (fbits >= (float)INT_MAX) {
95 return INFINITY_;
96 } else if (fbits < 0) {
97 return 0;
98 }
99
100 int32_t bits = (int32_t)fbits;
101 small_memcpy(&x, &bits, sizeof(x));
102 return x;
103 }
104
105 // Not static, as it's used by some test tools.
powf_(float x,float y)106 float powf_(float x, float y) {
107 assert (x >= 0);
108 return (x == 0) || (x == 1) ? x
109 : exp2f_(log2f_(x) * y);
110 }
111
expf_(float x)112 static float expf_(float x) {
113 const float log2_e = 1.4426950408889634074f;
114 return exp2f_(log2_e * x);
115 }
116
fmaxf_(float x,float y)117 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)118 static float fminf_(float x, float y) { return x < y ? x : y; }
119
isfinitef_(float x)120 static bool isfinitef_(float x) { return 0 == x*0; }
121
minus_1_ulp(float x)122 static float minus_1_ulp(float x) {
123 int32_t bits;
124 memcpy(&bits, &x, sizeof(bits));
125 bits = bits - 1;
126 memcpy(&x, &bits, sizeof(bits));
127 return x;
128 }
129
130 // Most transfer functions we work with are sRGBish.
131 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
132 // and repurpose the other fields to hold the parameters of the HDR functions.
133 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
134 struct TF_PQish { float A,B,C,D,E,F; };
135 struct TF_HLGish { float R,G,a,b,c; };
136
TFKind_marker(TFKind kind)137 static float TFKind_marker(TFKind kind) {
138 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
139 return -(float)kind;
140 }
141
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)142 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
143 , TF_HLGish* hlg = nullptr) {
144 if (tf.g < 0 && (int)tf.g == tf.g) {
145 // TODO: sanity checks for PQ/HLG like we do for sRGBish.
146 switch ((int)tf.g) {
147 case -PQish: if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
148 case -HLGish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
149 case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
150 }
151 return Bad;
152 }
153
154 // Basic sanity checks for sRGBish transfer functions.
155 if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
156 // a,c,d,g should be non-negative to make any sense.
157 && tf.a >= 0
158 && tf.c >= 0
159 && tf.d >= 0
160 && tf.g >= 0
161 // Raising a negative value to a fractional tf->g produces complex numbers.
162 && tf.a * tf.d + tf.b >= 0) {
163 return sRGBish;
164 }
165
166 return Bad;
167 }
168
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)169 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
170 return classify(*tf) == sRGBish;
171 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)172 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
173 return classify(*tf) == PQish;
174 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)175 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
176 return classify(*tf) == HLGish;
177 }
178
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)179 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
180 float A, float B, float C,
181 float D, float E, float F) {
182 *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
183 assert(skcms_TransferFunction_isPQish(tf));
184 return true;
185 }
186
skcms_TransferFunction_makeHLGish(skcms_TransferFunction * tf,float R,float G,float a,float b,float c)187 bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf,
188 float R, float G,
189 float a, float b, float c) {
190 *tf = { TFKind_marker(HLGish), R,G, a,b,c, 0 };
191 assert(skcms_TransferFunction_isHLGish(tf));
192 return true;
193 }
194
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)195 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
196 float sign = x < 0 ? -1.0f : 1.0f;
197 x *= sign;
198
199 TF_PQish pq;
200 TF_HLGish hlg;
201 switch (classify(*tf, &pq, &hlg)) {
202 case Bad: break;
203
204 case HLGish: return sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
205 : expf_((x-hlg.c)*hlg.a) + hlg.b);
206
207 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
208 case HLGinvish: return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
209 : hlg.a * logf_(x - hlg.b) + hlg.c);
210
211
212 case sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f
213 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
214
215 case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
216 / (pq.D + pq.E * powf_(x, pq.C)),
217 pq.F);
218 }
219 return 0;
220 }
221
222
eval_curve(const skcms_Curve * curve,float x)223 static float eval_curve(const skcms_Curve* curve, float x) {
224 if (curve->table_entries == 0) {
225 return skcms_TransferFunction_eval(&curve->parametric, x);
226 }
227
228 float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
229 int lo = (int) ix ,
230 hi = (int)(float)minus_1_ulp(ix + 1.0f);
231 float t = ix - (float)lo;
232
233 float l, h;
234 if (curve->table_8) {
235 l = curve->table_8[lo] * (1/255.0f);
236 h = curve->table_8[hi] * (1/255.0f);
237 } else {
238 uint16_t be_l, be_h;
239 memcpy(&be_l, curve->table_16 + 2*lo, 2);
240 memcpy(&be_h, curve->table_16 + 2*hi, 2);
241 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
242 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
243 l = le_l * (1/65535.0f);
244 h = le_h * (1/65535.0f);
245 }
246 return l + (h-l)*t;
247 }
248
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)249 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
250 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
251 const float dx = 1.0f / (N - 1);
252 float err = 0;
253 for (uint32_t i = 0; i < N; i++) {
254 float x = i * dx,
255 y = eval_curve(curve, x);
256 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
257 }
258 return err;
259 }
260
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)261 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
262 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
263 }
264
265 // Additional ICC signature values that are only used internally
266 enum {
267 // File signature
268 skcms_Signature_acsp = 0x61637370,
269
270 // Tag signatures
271 skcms_Signature_rTRC = 0x72545243,
272 skcms_Signature_gTRC = 0x67545243,
273 skcms_Signature_bTRC = 0x62545243,
274 skcms_Signature_kTRC = 0x6B545243,
275
276 skcms_Signature_rXYZ = 0x7258595A,
277 skcms_Signature_gXYZ = 0x6758595A,
278 skcms_Signature_bXYZ = 0x6258595A,
279
280 skcms_Signature_A2B0 = 0x41324230,
281 skcms_Signature_A2B1 = 0x41324231,
282 skcms_Signature_mAB = 0x6D414220,
283
284 skcms_Signature_CHAD = 0x63686164,
285 skcms_Signature_WTPT = 0x77747074,
286
287 // Type signatures
288 skcms_Signature_curv = 0x63757276,
289 skcms_Signature_mft1 = 0x6D667431,
290 skcms_Signature_mft2 = 0x6D667432,
291 skcms_Signature_para = 0x70617261,
292 skcms_Signature_sf32 = 0x73663332,
293 // XYZ is also a PCS signature, so it's defined in skcms.h
294 // skcms_Signature_XYZ = 0x58595A20,
295 };
296
read_big_u16(const uint8_t * ptr)297 static uint16_t read_big_u16(const uint8_t* ptr) {
298 uint16_t be;
299 memcpy(&be, ptr, sizeof(be));
300 #if defined(_MSC_VER)
301 return _byteswap_ushort(be);
302 #else
303 return __builtin_bswap16(be);
304 #endif
305 }
306
read_big_u32(const uint8_t * ptr)307 static uint32_t read_big_u32(const uint8_t* ptr) {
308 uint32_t be;
309 memcpy(&be, ptr, sizeof(be));
310 #if defined(_MSC_VER)
311 return _byteswap_ulong(be);
312 #else
313 return __builtin_bswap32(be);
314 #endif
315 }
316
read_big_i32(const uint8_t * ptr)317 static int32_t read_big_i32(const uint8_t* ptr) {
318 return (int32_t)read_big_u32(ptr);
319 }
320
read_big_fixed(const uint8_t * ptr)321 static float read_big_fixed(const uint8_t* ptr) {
322 return read_big_i32(ptr) * (1.0f / 65536.0f);
323 }
324
325 // Maps to an in-memory profile so that fields line up to the locations specified
326 // in ICC.1:2010, section 7.2
327 typedef struct {
328 uint8_t size [ 4];
329 uint8_t cmm_type [ 4];
330 uint8_t version [ 4];
331 uint8_t profile_class [ 4];
332 uint8_t data_color_space [ 4];
333 uint8_t pcs [ 4];
334 uint8_t creation_date_time [12];
335 uint8_t signature [ 4];
336 uint8_t platform [ 4];
337 uint8_t flags [ 4];
338 uint8_t device_manufacturer [ 4];
339 uint8_t device_model [ 4];
340 uint8_t device_attributes [ 8];
341 uint8_t rendering_intent [ 4];
342 uint8_t illuminant_X [ 4];
343 uint8_t illuminant_Y [ 4];
344 uint8_t illuminant_Z [ 4];
345 uint8_t creator [ 4];
346 uint8_t profile_id [16];
347 uint8_t reserved [28];
348 uint8_t tag_count [ 4]; // Technically not part of header, but required
349 } header_Layout;
350
351 typedef struct {
352 uint8_t signature [4];
353 uint8_t offset [4];
354 uint8_t size [4];
355 } tag_Layout;
356
get_tag_table(const skcms_ICCProfile * profile)357 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
358 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
359 }
360
361 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
362 // use of the type is for the CHAD tag that stores exactly nine values.
363 typedef struct {
364 uint8_t type [ 4];
365 uint8_t reserved [ 4];
366 uint8_t values [36];
367 } sf32_Layout;
368
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)369 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
370 skcms_ICCTag tag;
371 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
372 return false;
373 }
374
375 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
376 return false;
377 }
378
379 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
380 const uint8_t* values = sf32Tag->values;
381 for (int r = 0; r < 3; ++r)
382 for (int c = 0; c < 3; ++c, values += 4) {
383 m->vals[r][c] = read_big_fixed(values);
384 }
385 return true;
386 }
387
388 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
389 // the type are for tags/data that store exactly one triple.
390 typedef struct {
391 uint8_t type [4];
392 uint8_t reserved [4];
393 uint8_t X [4];
394 uint8_t Y [4];
395 uint8_t Z [4];
396 } XYZ_Layout;
397
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)398 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
399 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
400 return false;
401 }
402
403 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
404
405 *x = read_big_fixed(xyzTag->X);
406 *y = read_big_fixed(xyzTag->Y);
407 *z = read_big_fixed(xyzTag->Z);
408 return true;
409 }
410
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])411 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
412 skcms_ICCTag tag;
413 return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
414 read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
415 }
416
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)417 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
418 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
419 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
420 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
421 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
422 }
423
424 typedef struct {
425 uint8_t type [4];
426 uint8_t reserved_a [4];
427 uint8_t function_type [2];
428 uint8_t reserved_b [2];
429 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
430 } para_Layout;
431
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)432 static bool read_curve_para(const uint8_t* buf, uint32_t size,
433 skcms_Curve* curve, uint32_t* curve_size) {
434 if (size < SAFE_FIXED_SIZE(para_Layout)) {
435 return false;
436 }
437
438 const para_Layout* paraTag = (const para_Layout*)buf;
439
440 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
441 uint16_t function_type = read_big_u16(paraTag->function_type);
442 if (function_type > kGABCDEF) {
443 return false;
444 }
445
446 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
447 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
448 return false;
449 }
450
451 if (curve_size) {
452 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
453 }
454
455 curve->table_entries = 0;
456 curve->parametric.a = 1.0f;
457 curve->parametric.b = 0.0f;
458 curve->parametric.c = 0.0f;
459 curve->parametric.d = 0.0f;
460 curve->parametric.e = 0.0f;
461 curve->parametric.f = 0.0f;
462 curve->parametric.g = read_big_fixed(paraTag->variable);
463
464 switch (function_type) {
465 case kGAB:
466 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
467 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
468 if (curve->parametric.a == 0) {
469 return false;
470 }
471 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
472 break;
473 case kGABC:
474 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
475 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
476 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
477 if (curve->parametric.a == 0) {
478 return false;
479 }
480 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
481 curve->parametric.f = curve->parametric.e;
482 break;
483 case kGABCD:
484 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
485 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
486 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
487 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
488 break;
489 case kGABCDEF:
490 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
491 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
492 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
493 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
494 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
495 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
496 break;
497 }
498 return skcms_TransferFunction_isSRGBish(&curve->parametric);
499 }
500
501 typedef struct {
502 uint8_t type [4];
503 uint8_t reserved [4];
504 uint8_t value_count [4];
505 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
506 } curv_Layout;
507
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)508 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
509 skcms_Curve* curve, uint32_t* curve_size) {
510 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
511 return false;
512 }
513
514 const curv_Layout* curvTag = (const curv_Layout*)buf;
515
516 uint32_t value_count = read_big_u32(curvTag->value_count);
517 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
518 return false;
519 }
520
521 if (curve_size) {
522 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
523 }
524
525 if (value_count < 2) {
526 curve->table_entries = 0;
527 curve->parametric.a = 1.0f;
528 curve->parametric.b = 0.0f;
529 curve->parametric.c = 0.0f;
530 curve->parametric.d = 0.0f;
531 curve->parametric.e = 0.0f;
532 curve->parametric.f = 0.0f;
533 if (value_count == 0) {
534 // Empty tables are a shorthand for an identity curve
535 curve->parametric.g = 1.0f;
536 } else {
537 // Single entry tables are a shorthand for simple gamma
538 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
539 }
540 } else {
541 curve->table_8 = nullptr;
542 curve->table_16 = curvTag->variable;
543 curve->table_entries = value_count;
544 }
545
546 return true;
547 }
548
549 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
550 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)551 static bool read_curve(const uint8_t* buf, uint32_t size,
552 skcms_Curve* curve, uint32_t* curve_size) {
553 if (!buf || size < 4 || !curve) {
554 return false;
555 }
556
557 uint32_t type = read_big_u32(buf);
558 if (type == skcms_Signature_para) {
559 return read_curve_para(buf, size, curve, curve_size);
560 } else if (type == skcms_Signature_curv) {
561 return read_curve_curv(buf, size, curve, curve_size);
562 }
563
564 return false;
565 }
566
567 // mft1 and mft2 share a large chunk of data
568 typedef struct {
569 uint8_t type [ 4];
570 uint8_t reserved_a [ 4];
571 uint8_t input_channels [ 1];
572 uint8_t output_channels [ 1];
573 uint8_t grid_points [ 1];
574 uint8_t reserved_b [ 1];
575 uint8_t matrix [36];
576 } mft_CommonLayout;
577
578 typedef struct {
579 mft_CommonLayout common [1];
580
581 uint8_t variable [1/*variable*/];
582 } mft1_Layout;
583
584 typedef struct {
585 mft_CommonLayout common [1];
586
587 uint8_t input_table_entries [2];
588 uint8_t output_table_entries [2];
589 uint8_t variable [1/*variable*/];
590 } mft2_Layout;
591
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)592 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
593 // MFT matrices are applied before the first set of curves, but must be identity unless the
594 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
595 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
596 // field/flag.
597 a2b->matrix_channels = 0;
598
599 a2b->input_channels = mftTag->input_channels[0];
600 a2b->output_channels = mftTag->output_channels[0];
601
602 // We require exactly three (ie XYZ/Lab/RGB) output channels
603 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
604 return false;
605 }
606 // We require at least one, and no more than four (ie CMYK) input channels
607 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
608 return false;
609 }
610
611 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
612 a2b->grid_points[i] = mftTag->grid_points[0];
613 }
614 // The grid only makes sense with at least two points along each axis
615 if (a2b->grid_points[0] < 2) {
616 return false;
617 }
618
619 return true;
620 }
621
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)622 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
623 uint32_t input_table_entries, uint32_t output_table_entries,
624 skcms_A2B* a2b) {
625 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
626 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
627 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
628
629 // [input|output]_channels are <= 4, so still no overflow
630 uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table;
631 uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
632
633 uint64_t grid_size = a2b->output_channels * byte_width;
634 for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
635 grid_size *= a2b->grid_points[axis];
636 }
637
638 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
639 return false;
640 }
641
642 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
643 a2b->input_curves[i].table_entries = input_table_entries;
644 if (byte_width == 1) {
645 a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
646 a2b->input_curves[i].table_16 = nullptr;
647 } else {
648 a2b->input_curves[i].table_8 = nullptr;
649 a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
650 }
651 }
652
653 if (byte_width == 1) {
654 a2b->grid_8 = table_base + byte_len_all_input_tables;
655 a2b->grid_16 = nullptr;
656 } else {
657 a2b->grid_8 = nullptr;
658 a2b->grid_16 = table_base + byte_len_all_input_tables;
659 }
660
661 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
662 for (uint32_t i = 0; i < a2b->output_channels; ++i) {
663 a2b->output_curves[i].table_entries = output_table_entries;
664 if (byte_width == 1) {
665 a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
666 a2b->output_curves[i].table_16 = nullptr;
667 } else {
668 a2b->output_curves[i].table_8 = nullptr;
669 a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
670 }
671 }
672
673 return true;
674 }
675
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)676 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
677 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
678 return false;
679 }
680
681 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
682 if (!read_mft_common(mftTag->common, a2b)) {
683 return false;
684 }
685
686 uint32_t input_table_entries = 256;
687 uint32_t output_table_entries = 256;
688
689 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
690 input_table_entries, output_table_entries, a2b);
691 }
692
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)693 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
694 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
695 return false;
696 }
697
698 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
699 if (!read_mft_common(mftTag->common, a2b)) {
700 return false;
701 }
702
703 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
704 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
705
706 // ICC spec mandates that 2 <= table_entries <= 4096
707 if (input_table_entries < 2 || input_table_entries > 4096 ||
708 output_table_entries < 2 || output_table_entries > 4096) {
709 return false;
710 }
711
712 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
713 input_table_entries, output_table_entries, a2b);
714 }
715
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)716 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
717 uint32_t num_curves, skcms_Curve* curves) {
718 for (uint32_t i = 0; i < num_curves; ++i) {
719 if (curve_offset > size) {
720 return false;
721 }
722
723 uint32_t curve_bytes;
724 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
725 return false;
726 }
727
728 if (curve_bytes > UINT32_MAX - 3) {
729 return false;
730 }
731 curve_bytes = (curve_bytes + 3) & ~3U;
732
733 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
734 curve_offset = (uint32_t)new_offset_64;
735 if (new_offset_64 != curve_offset) {
736 return false;
737 }
738 }
739
740 return true;
741 }
742
743 typedef struct {
744 uint8_t type [ 4];
745 uint8_t reserved_a [ 4];
746 uint8_t input_channels [ 1];
747 uint8_t output_channels [ 1];
748 uint8_t reserved_b [ 2];
749 uint8_t b_curve_offset [ 4];
750 uint8_t matrix_offset [ 4];
751 uint8_t m_curve_offset [ 4];
752 uint8_t clut_offset [ 4];
753 uint8_t a_curve_offset [ 4];
754 } mAB_Layout;
755
756 typedef struct {
757 uint8_t grid_points [16];
758 uint8_t grid_byte_width [ 1];
759 uint8_t reserved [ 3];
760 uint8_t variable [1/*variable*/];
761 } mABCLUT_Layout;
762
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)763 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
764 if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
765 return false;
766 }
767
768 const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
769
770 a2b->input_channels = mABTag->input_channels[0];
771 a2b->output_channels = mABTag->output_channels[0];
772
773 // We require exactly three (ie XYZ/Lab/RGB) output channels
774 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
775 return false;
776 }
777 // We require no more than four (ie CMYK) input channels
778 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
779 return false;
780 }
781
782 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
783 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
784 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
785 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
786 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
787
788 // "B" curves must be present
789 if (0 == b_curve_offset) {
790 return false;
791 }
792
793 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
794 a2b->output_curves)) {
795 return false;
796 }
797
798 // "M" curves and Matrix must be used together
799 if (0 != m_curve_offset) {
800 if (0 == matrix_offset) {
801 return false;
802 }
803 a2b->matrix_channels = a2b->output_channels;
804 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
805 a2b->matrix_curves)) {
806 return false;
807 }
808
809 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
810 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
811 return false;
812 }
813 float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
814 const uint8_t* mtx_buf = tag->buf + matrix_offset;
815 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
816 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
817 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
818 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
819 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
820 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
821 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
822 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
823 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
824 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
825 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
826 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
827 } else {
828 if (0 != matrix_offset) {
829 return false;
830 }
831 a2b->matrix_channels = 0;
832 }
833
834 // "A" curves and CLUT must be used together
835 if (0 != a_curve_offset) {
836 if (0 == clut_offset) {
837 return false;
838 }
839 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
840 a2b->input_curves)) {
841 return false;
842 }
843
844 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
845 return false;
846 }
847 const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
848
849 if (clut->grid_byte_width[0] == 1) {
850 a2b->grid_8 = clut->variable;
851 a2b->grid_16 = nullptr;
852 } else if (clut->grid_byte_width[0] == 2) {
853 a2b->grid_8 = nullptr;
854 a2b->grid_16 = clut->variable;
855 } else {
856 return false;
857 }
858
859 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
860 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
861 a2b->grid_points[i] = clut->grid_points[i];
862 // The grid only makes sense with at least two points along each axis
863 if (a2b->grid_points[i] < 2) {
864 return false;
865 }
866 grid_size *= a2b->grid_points[i];
867 }
868 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
869 return false;
870 }
871 } else {
872 if (0 != clut_offset) {
873 return false;
874 }
875
876 // If there is no CLUT, the number of input and output channels must match
877 if (a2b->input_channels != a2b->output_channels) {
878 return false;
879 }
880
881 // Zero out the number of input channels to signal that we're skipping this stage
882 a2b->input_channels = 0;
883 }
884
885 return true;
886 }
887
888 // If you pass f, we'll fit a possibly-non-zero value for *f.
889 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)890 static int fit_linear(const skcms_Curve* curve, int N, float tol,
891 float* c, float* d, float* f = nullptr) {
892 assert(N > 1);
893 // We iteratively fit the first points to the TF's linear piece.
894 // We want the cx + f line to pass through the first and last points we fit exactly.
895 //
896 // As we walk along the points we find the minimum and maximum slope of the line before the
897 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
898 // emtpy, when we definitely can't add any more points.
899 //
900 // Some points' error intervals may intersect the running interval but not lie fully
901 // within it. So we keep track of the last point we saw that is a valid end point candidate,
902 // and once the search is done, back up to build the line through *that* point.
903 const float dx = 1.0f / (N - 1);
904
905 int lin_points = 1;
906
907 float f_zero = 0.0f;
908 if (f) {
909 *f = eval_curve(curve, 0);
910 } else {
911 f = &f_zero;
912 }
913
914
915 float slope_min = -INFINITY_;
916 float slope_max = +INFINITY_;
917 for (int i = 1; i < N; ++i) {
918 float x = i * dx;
919 float y = eval_curve(curve, x);
920
921 float slope_max_i = (y + tol - *f) / x,
922 slope_min_i = (y - tol - *f) / x;
923 if (slope_max_i < slope_min || slope_max < slope_min_i) {
924 // Slope intervals would no longer overlap.
925 break;
926 }
927 slope_max = fminf_(slope_max, slope_max_i);
928 slope_min = fmaxf_(slope_min, slope_min_i);
929
930 float cur_slope = (y - *f) / x;
931 if (slope_min <= cur_slope && cur_slope <= slope_max) {
932 lin_points = i + 1;
933 *c = cur_slope;
934 }
935 }
936
937 // Set D to the last point that met our tolerance.
938 *d = (lin_points - 1) * dx;
939 return lin_points;
940 }
941
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)942 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
943 bool ok = false;
944 if (tag->type == skcms_Signature_mft1) {
945 ok = read_tag_mft1(tag, a2b);
946 } else if (tag->type == skcms_Signature_mft2) {
947 ok = read_tag_mft2(tag, a2b);
948 } else if (tag->type == skcms_Signature_mAB) {
949 ok = read_tag_mab(tag, a2b, pcs_is_xyz);
950 }
951 if (!ok) {
952 return false;
953 }
954
955 // Detect and canonicalize identity tables.
956 skcms_Curve* curves[] = {
957 a2b->input_channels > 0 ? a2b->input_curves + 0 : nullptr,
958 a2b->input_channels > 1 ? a2b->input_curves + 1 : nullptr,
959 a2b->input_channels > 2 ? a2b->input_curves + 2 : nullptr,
960 a2b->input_channels > 3 ? a2b->input_curves + 3 : nullptr,
961 a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
962 a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
963 a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
964 a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
965 a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
966 a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
967 };
968
969 for (int i = 0; i < ARRAY_COUNT(curves); i++) {
970 skcms_Curve* curve = curves[i];
971
972 if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
973 int N = (int)curve->table_entries;
974
975 float c = 0.0f, d = 0.0f, f = 0.0f;
976 if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
977 && c == 1.0f
978 && f == 0.0f) {
979 curve->table_entries = 0;
980 curve->table_8 = nullptr;
981 curve->table_16 = nullptr;
982 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
983 }
984 }
985 }
986
987 return true;
988 }
989
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)990 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
991 if (!profile || !profile->buffer || !tag) { return; }
992 if (idx > profile->tag_count) { return; }
993 const tag_Layout* tags = get_tag_table(profile);
994 tag->signature = read_big_u32(tags[idx].signature);
995 tag->size = read_big_u32(tags[idx].size);
996 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
997 tag->type = read_big_u32(tag->buf);
998 }
999
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1000 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1001 if (!profile || !profile->buffer || !tag) { return false; }
1002 const tag_Layout* tags = get_tag_table(profile);
1003 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1004 if (read_big_u32(tags[i].signature) == sig) {
1005 tag->signature = sig;
1006 tag->size = read_big_u32(tags[i].size);
1007 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
1008 tag->type = read_big_u32(tag->buf);
1009 return true;
1010 }
1011 }
1012 return false;
1013 }
1014
usable_as_src(const skcms_ICCProfile * profile)1015 static bool usable_as_src(const skcms_ICCProfile* profile) {
1016 return profile->has_A2B
1017 || (profile->has_trc && profile->has_toXYZD50);
1018 }
1019
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)1020 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
1021 assert(SAFE_SIZEOF(header_Layout) == 132);
1022
1023 if (!profile) {
1024 return false;
1025 }
1026 memset(profile, 0, SAFE_SIZEOF(*profile));
1027
1028 if (len < SAFE_SIZEOF(header_Layout)) {
1029 return false;
1030 }
1031
1032 // Byte-swap all header fields
1033 const header_Layout* header = (const header_Layout*)buf;
1034 profile->buffer = (const uint8_t*)buf;
1035 profile->size = read_big_u32(header->size);
1036 uint32_t version = read_big_u32(header->version);
1037 profile->data_color_space = read_big_u32(header->data_color_space);
1038 profile->pcs = read_big_u32(header->pcs);
1039 uint32_t signature = read_big_u32(header->signature);
1040 float illuminant_X = read_big_fixed(header->illuminant_X);
1041 float illuminant_Y = read_big_fixed(header->illuminant_Y);
1042 float illuminant_Z = read_big_fixed(header->illuminant_Z);
1043 profile->tag_count = read_big_u32(header->tag_count);
1044
1045 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1046 // and major version
1047 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1048 if (signature != skcms_Signature_acsp ||
1049 profile->size > len ||
1050 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1051 (version >> 24) > 4) {
1052 return false;
1053 }
1054
1055 // Validate that illuminant is D50 white
1056 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1057 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1058 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1059 return false;
1060 }
1061
1062 // Validate that all tag entries have sane offset + size
1063 const tag_Layout* tags = get_tag_table(profile);
1064 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1065 uint32_t tag_offset = read_big_u32(tags[i].offset);
1066 uint32_t tag_size = read_big_u32(tags[i].size);
1067 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1068 if (tag_size < 4 || tag_end > profile->size) {
1069 return false;
1070 }
1071 }
1072
1073 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1074 return false;
1075 }
1076
1077 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1078
1079 // Pre-parse commonly used tags.
1080 skcms_ICCTag kTRC;
1081 if (profile->data_color_space == skcms_Signature_Gray &&
1082 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1083 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1084 // Malformed tag
1085 return false;
1086 }
1087 profile->trc[1] = profile->trc[0];
1088 profile->trc[2] = profile->trc[0];
1089 profile->has_trc = true;
1090
1091 if (pcs_is_xyz) {
1092 profile->toXYZD50.vals[0][0] = illuminant_X;
1093 profile->toXYZD50.vals[1][1] = illuminant_Y;
1094 profile->toXYZD50.vals[2][2] = illuminant_Z;
1095 profile->has_toXYZD50 = true;
1096 }
1097 } else {
1098 skcms_ICCTag rTRC, gTRC, bTRC;
1099 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1100 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1101 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1102 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1103 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1104 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1105 // Malformed TRC tags
1106 return false;
1107 }
1108 profile->has_trc = true;
1109 }
1110
1111 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1112 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1113 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1114 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1115 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1116 // Malformed XYZ tags
1117 return false;
1118 }
1119 profile->has_toXYZD50 = true;
1120 }
1121 }
1122
1123 skcms_ICCTag a2b_tag;
1124
1125 // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
1126 // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
1127 // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
1128 // and all our known users are thinking exclusively in terms of relative colormetric.
1129 const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
1130 for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
1131 if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
1132 if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
1133 // Malformed A2B tag
1134 return false;
1135 }
1136 profile->has_A2B = true;
1137 break;
1138 }
1139 }
1140
1141 return usable_as_src(profile);
1142 }
1143
1144
skcms_sRGB_profile()1145 const skcms_ICCProfile* skcms_sRGB_profile() {
1146 static const skcms_ICCProfile sRGB_profile = {
1147 nullptr, // buffer, moot here
1148
1149 0, // size, moot here
1150 skcms_Signature_RGB, // data_color_space
1151 skcms_Signature_XYZ, // pcs
1152 0, // tag count, moot here
1153
1154 // We choose to represent sRGB with its canonical transfer function,
1155 // and with its canonical XYZD50 gamut matrix.
1156 true, // has_trc, followed by the 3 trc curves
1157 {
1158 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1159 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1160 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1161 },
1162
1163 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1164 {{
1165 { 0.436065674f, 0.385147095f, 0.143066406f },
1166 { 0.222488403f, 0.716873169f, 0.060607910f },
1167 { 0.013916016f, 0.097076416f, 0.714096069f },
1168 }},
1169
1170 false, // has_A2B, followed by a2b itself which we don't care about.
1171 {
1172 0,
1173 {
1174 {{0, {0,0, 0,0,0,0,0}}},
1175 {{0, {0,0, 0,0,0,0,0}}},
1176 {{0, {0,0, 0,0,0,0,0}}},
1177 {{0, {0,0, 0,0,0,0,0}}},
1178 },
1179 {0,0,0,0},
1180 nullptr,
1181 nullptr,
1182
1183 0,
1184 {
1185 {{0, {0,0, 0,0,0,0,0}}},
1186 {{0, {0,0, 0,0,0,0,0}}},
1187 {{0, {0,0, 0,0,0,0,0}}},
1188 },
1189 {{
1190 { 0,0,0,0 },
1191 { 0,0,0,0 },
1192 { 0,0,0,0 },
1193 }},
1194
1195 0,
1196 {
1197 {{0, {0,0, 0,0,0,0,0}}},
1198 {{0, {0,0, 0,0,0,0,0}}},
1199 {{0, {0,0, 0,0,0,0,0}}},
1200 },
1201 },
1202 };
1203 return &sRGB_profile;
1204 }
1205
skcms_XYZD50_profile()1206 const skcms_ICCProfile* skcms_XYZD50_profile() {
1207 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1208 static const skcms_ICCProfile XYZD50_profile = {
1209 nullptr, // buffer, moot here
1210
1211 0, // size, moot here
1212 skcms_Signature_RGB, // data_color_space
1213 skcms_Signature_XYZ, // pcs
1214 0, // tag count, moot here
1215
1216 true, // has_trc, followed by the 3 trc curves
1217 {
1218 {{0, {1,1, 0,0,0,0,0}}},
1219 {{0, {1,1, 0,0,0,0,0}}},
1220 {{0, {1,1, 0,0,0,0,0}}},
1221 },
1222
1223 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1224 {{
1225 { 1,0,0 },
1226 { 0,1,0 },
1227 { 0,0,1 },
1228 }},
1229
1230 false, // has_A2B, followed by a2b itself which we don't care about.
1231 {
1232 0,
1233 {
1234 {{0, {0,0, 0,0,0,0,0}}},
1235 {{0, {0,0, 0,0,0,0,0}}},
1236 {{0, {0,0, 0,0,0,0,0}}},
1237 {{0, {0,0, 0,0,0,0,0}}},
1238 },
1239 {0,0,0,0},
1240 nullptr,
1241 nullptr,
1242
1243 0,
1244 {
1245 {{0, {0,0, 0,0,0,0,0}}},
1246 {{0, {0,0, 0,0,0,0,0}}},
1247 {{0, {0,0, 0,0,0,0,0}}},
1248 },
1249 {{
1250 { 0,0,0,0 },
1251 { 0,0,0,0 },
1252 { 0,0,0,0 },
1253 }},
1254
1255 0,
1256 {
1257 {{0, {0,0, 0,0,0,0,0}}},
1258 {{0, {0,0, 0,0,0,0,0}}},
1259 {{0, {0,0, 0,0,0,0,0}}},
1260 },
1261 },
1262 };
1263
1264 return &XYZD50_profile;
1265 }
1266
skcms_sRGB_TransferFunction()1267 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1268 return &skcms_sRGB_profile()->trc[0].parametric;
1269 }
1270
skcms_sRGB_Inverse_TransferFunction()1271 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1272 static const skcms_TransferFunction sRGB_inv =
1273 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1274 return &sRGB_inv;
1275 }
1276
skcms_Identity_TransferFunction()1277 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1278 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1279 return &identity;
1280 }
1281
1282 const uint8_t skcms_252_random_bytes[] = {
1283 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1284 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1285 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1286 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1287 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1288 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1289 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1290 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1291 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1292 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1293 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1294 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1295 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1296 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1297 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1298 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1299 };
1300
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1301 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1302 // Test for exactly equal profiles first.
1303 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1304 return true;
1305 }
1306
1307 // For now this is the essentially the same strategy we use in test_only.c
1308 // for our skcms_Transform() smoke tests:
1309 // 1) transform A to XYZD50
1310 // 2) transform B to XYZD50
1311 // 3) return true if they're similar enough
1312 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1313
1314 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1315 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1316
1317 // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1318 // to be treated as equal. But CMYK profiles are a totally different ballgame.
1319 const auto CMYK = skcms_Signature_CMYK;
1320 if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1321 return false;
1322 }
1323
1324 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1325 // TODO: working with RGBA_8888 either way is probably fastest.
1326 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1327 size_t npixels = 84;
1328 if (A->data_color_space == skcms_Signature_CMYK) {
1329 fmt = skcms_PixelFormat_RGBA_8888;
1330 npixels = 63;
1331 }
1332
1333 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1334 // use pre-canned results and skip that skcms_Transform() call?
1335 uint8_t dstA[252],
1336 dstB[252];
1337 if (!skcms_Transform(
1338 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1339 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1340 npixels)) {
1341 return false;
1342 }
1343 if (!skcms_Transform(
1344 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1345 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1346 npixels)) {
1347 return false;
1348 }
1349
1350 // TODO: make sure this final check has reasonable codegen.
1351 for (size_t i = 0; i < 252; i++) {
1352 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1353 return false;
1354 }
1355 }
1356 return true;
1357 }
1358
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1359 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1360 const skcms_TransferFunction* inv_tf) {
1361 if (!profile || !profile->has_trc) {
1362 return false;
1363 }
1364
1365 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1366 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1367 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1368 }
1369
is_zero_to_one(float x)1370 static bool is_zero_to_one(float x) {
1371 return 0 <= x && x <= 1;
1372 }
1373
1374 typedef struct { float vals[3]; } skcms_Vector3;
1375
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1376 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1377 skcms_Vector3 dst = {{0,0,0}};
1378 for (int row = 0; row < 3; ++row) {
1379 dst.vals[row] = m->vals[row][0] * v->vals[0]
1380 + m->vals[row][1] * v->vals[1]
1381 + m->vals[row][2] * v->vals[2];
1382 }
1383 return dst;
1384 }
1385
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1386 bool skcms_AdaptToXYZD50(float wx, float wy,
1387 skcms_Matrix3x3* toXYZD50) {
1388 if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1389 !toXYZD50) {
1390 return false;
1391 }
1392
1393 // Assumes that Y is 1.0f.
1394 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1395
1396 // Now convert toXYZ matrix to toXYZD50.
1397 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1398
1399 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1400 // the matrices below. The Bradford method is used by Adobe and is widely considered
1401 // to be the best.
1402 skcms_Matrix3x3 xyz_to_lms = {{
1403 { 0.8951f, 0.2664f, -0.1614f },
1404 { -0.7502f, 1.7135f, 0.0367f },
1405 { 0.0389f, -0.0685f, 1.0296f },
1406 }};
1407 skcms_Matrix3x3 lms_to_xyz = {{
1408 { 0.9869929f, -0.1470543f, 0.1599627f },
1409 { 0.4323053f, 0.5183603f, 0.0492912f },
1410 { -0.0085287f, 0.0400428f, 0.9684867f },
1411 }};
1412
1413 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1414 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1415
1416 *toXYZD50 = {{
1417 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1418 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1419 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1420 }};
1421 *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1422 *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1423
1424 return true;
1425 }
1426
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1427 bool skcms_PrimariesToXYZD50(float rx, float ry,
1428 float gx, float gy,
1429 float bx, float by,
1430 float wx, float wy,
1431 skcms_Matrix3x3* toXYZD50) {
1432 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1433 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1434 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1435 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1436 !toXYZD50) {
1437 return false;
1438 }
1439
1440 // First, we need to convert xy values (primaries) to XYZ.
1441 skcms_Matrix3x3 primaries = {{
1442 { rx, gx, bx },
1443 { ry, gy, by },
1444 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1445 }};
1446 skcms_Matrix3x3 primaries_inv;
1447 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1448 return false;
1449 }
1450
1451 // Assumes that Y is 1.0f.
1452 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1453 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1454
1455 skcms_Matrix3x3 toXYZ = {{
1456 { XYZ.vals[0], 0, 0 },
1457 { 0, XYZ.vals[1], 0 },
1458 { 0, 0, XYZ.vals[2] },
1459 }};
1460 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1461
1462 skcms_Matrix3x3 DXtoD50;
1463 if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1464 return false;
1465 }
1466
1467 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1468 return true;
1469 }
1470
1471
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1472 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1473 double a00 = src->vals[0][0],
1474 a01 = src->vals[1][0],
1475 a02 = src->vals[2][0],
1476 a10 = src->vals[0][1],
1477 a11 = src->vals[1][1],
1478 a12 = src->vals[2][1],
1479 a20 = src->vals[0][2],
1480 a21 = src->vals[1][2],
1481 a22 = src->vals[2][2];
1482
1483 double b0 = a00*a11 - a01*a10,
1484 b1 = a00*a12 - a02*a10,
1485 b2 = a01*a12 - a02*a11,
1486 b3 = a20,
1487 b4 = a21,
1488 b5 = a22;
1489
1490 double determinant = b0*b5
1491 - b1*b4
1492 + b2*b3;
1493
1494 if (determinant == 0) {
1495 return false;
1496 }
1497
1498 double invdet = 1.0 / determinant;
1499 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1500 return false;
1501 }
1502
1503 b0 *= invdet;
1504 b1 *= invdet;
1505 b2 *= invdet;
1506 b3 *= invdet;
1507 b4 *= invdet;
1508 b5 *= invdet;
1509
1510 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1511 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1512 dst->vals[2][0] = (float)( + b2 );
1513 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1514 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1515 dst->vals[2][1] = (float)( - b1 );
1516 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1517 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1518 dst->vals[2][2] = (float)( + b0 );
1519
1520 for (int r = 0; r < 3; ++r)
1521 for (int c = 0; c < 3; ++c) {
1522 if (!isfinitef_(dst->vals[r][c])) {
1523 return false;
1524 }
1525 }
1526 return true;
1527 }
1528
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1529 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1530 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1531 for (int r = 0; r < 3; r++)
1532 for (int c = 0; c < 3; c++) {
1533 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1534 + A->vals[r][1] * B->vals[1][c]
1535 + A->vals[r][2] * B->vals[2][c];
1536 }
1537 return m;
1538 }
1539
1540 #if defined(__clang__)
1541 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1542 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1543 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1544 TF_PQish pq;
1545 TF_HLGish hlg;
1546 switch (classify(*src, &pq, &hlg)) {
1547 case Bad: return false;
1548 case sRGBish: break; // handled below
1549
1550 case PQish:
1551 *dst = { TFKind_marker(PQish), -pq.A, pq.D, 1.0f/pq.F
1552 , pq.B, -pq.E, 1.0f/pq.C};
1553 return true;
1554
1555 case HLGish:
1556 *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1557 , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1558 return true;
1559
1560 case HLGinvish:
1561 *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1562 , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1563 return true;
1564 }
1565
1566 assert (classify(*src) == sRGBish);
1567
1568 // We're inverting this function, solving for x in terms of y.
1569 // y = (cx + f) x < d
1570 // (ax + b)^g + e x ≥ d
1571 // The inverse of this function can be expressed in the same piecewise form.
1572 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1573
1574 // We'll start by finding the new threshold inv.d.
1575 // In principle we should be able to find that by solving for y at x=d from either side.
1576 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1577 float d_l = src->c * src->d + src->f,
1578 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1579 if (fabsf_(d_l - d_r) > 1/512.0f) {
1580 return false;
1581 }
1582 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1583
1584 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1585 if (inv.d > 0) {
1586 // Inverting the linear section is pretty straightfoward:
1587 // y = cx + f
1588 // y - f = cx
1589 // (1/c)y - f/c = x
1590 inv.c = 1.0f/src->c;
1591 inv.f = -src->f/src->c;
1592 }
1593
1594 // The interesting part is inverting the nonlinear section:
1595 // y = (ax + b)^g + e.
1596 // y - e = (ax + b)^g
1597 // (y - e)^1/g = ax + b
1598 // (y - e)^1/g - b = ax
1599 // (1/a)(y - e)^1/g - b/a = x
1600 //
1601 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1602 // let k = (1/a)^g
1603 // (1/a)( y - e)^1/g - b/a = x
1604 // (ky - ke)^1/g - b/a = x
1605
1606 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1607 inv.g = 1.0f / src->g;
1608 inv.a = k;
1609 inv.b = -k * src->e;
1610 inv.e = -src->b / src->a;
1611
1612 // We need to enforce the same constraints here that we do when fitting a curve,
1613 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1614 // of the source function if we're here.
1615
1616 // Just like when fitting the curve, there's really no way to rescue a < 0.
1617 if (inv.a < 0) {
1618 return false;
1619 }
1620 // On the other hand we can rescue an ad+b that's gone slightly negative here.
1621 if (inv.a * inv.d + inv.b < 0) {
1622 inv.b = -inv.a * inv.d;
1623 }
1624
1625 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1626 // where we might still fail here, like non-finite parameter values.
1627 if (classify(inv) != sRGBish) {
1628 return false;
1629 }
1630
1631 assert (inv.a >= 0);
1632 assert (inv.a * inv.d + inv.b >= 0);
1633
1634 // Now in principle we're done.
1635 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1636 // e or f of the inverse, depending on which segment contains src(1.0f).
1637 float s = skcms_TransferFunction_eval(src, 1.0f);
1638 if (!isfinitef_(s)) {
1639 return false;
1640 }
1641
1642 float sign = s < 0 ? -1.0f : 1.0f;
1643 s *= sign;
1644 if (s < inv.d) {
1645 inv.f = 1.0f - sign * inv.c * s;
1646 } else {
1647 inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1648 }
1649
1650 *dst = inv;
1651 return classify(*dst) == sRGBish;
1652 }
1653
1654 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1655
1656 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1657 //
1658 // tf(x) = cx + f x < d
1659 // tf(x) = (ax + b)^g + e x ≥ d
1660 //
1661 // When fitting, we add the additional constraint that both pieces meet at d:
1662 //
1663 // cd + f = (ad + b)^g + e
1664 //
1665 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1666 //
1667 // tf(x) = cx + f x < d
1668 // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1669 //
1670 // Our overall strategy is then:
1671 // For a couple tolerances,
1672 // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1673 // - invert c,d,f
1674 // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1675 // (and by constraint, inverted e) to the inverse of the table.
1676 // Return the parameters with least maximum error.
1677 //
1678 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1679 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1680 //
1681 // let y = Table(x)
1682 // r(x) = x - f_inv(y)
1683 //
1684 // ∂r/∂g = ln(ay + b)*(ay + b)^g
1685 // - ln(ad + b)*(ad + b)^g
1686 // ∂r/∂a = yg(ay + b)^(g-1)
1687 // - dg(ad + b)^(g-1)
1688 // ∂r/∂b = g(ay + b)^(g-1)
1689 // - g(ad + b)^(g-1)
1690
1691 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1692 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])1693 static float rg_nonlinear(float x,
1694 const skcms_Curve* curve,
1695 const skcms_TransferFunction* tf,
1696 float dfdP[3]) {
1697 const float y = eval_curve(curve, x);
1698
1699 const float g = tf->g, a = tf->a, b = tf->b,
1700 c = tf->c, d = tf->d, f = tf->f;
1701
1702 const float Y = fmaxf_(a*y + b, 0.0f),
1703 D = a*d + b;
1704 assert (D >= 0);
1705
1706 // The gradient.
1707 dfdP[0] = logf_(Y)*powf_(Y, g)
1708 - logf_(D)*powf_(D, g);
1709 dfdP[1] = y*g*powf_(Y, g-1)
1710 - d*g*powf_(D, g-1);
1711 dfdP[2] = g*powf_(Y, g-1)
1712 - g*powf_(D, g-1);
1713
1714 // The residual.
1715 const float f_inv = powf_(Y, g)
1716 - powf_(D, g)
1717 + c*d + f;
1718 return x - f_inv;
1719 }
1720
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)1721 static bool gauss_newton_step(const skcms_Curve* curve,
1722 skcms_TransferFunction* tf,
1723 float x0, float dx, int N) {
1724 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1725 //
1726 // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
1727 //
1728 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1729 // where r(P) is the residual vector
1730 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
1731 //
1732 // Let's review the shape of each of these expressions:
1733 // r(P) is [N x 1], a column vector with one entry per value of x tested
1734 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
1735 // Jf^T is [3 x N], the transpose of Jf
1736 //
1737 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1738 // and so is its inverse (Jf^T Jf)^-1
1739 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1740 //
1741 // Our implementation strategy to get to the final ∆P is
1742 // 1) evaluate Jf^T Jf, call that lhs
1743 // 2) evaluate Jf^T r(P), call that rhs
1744 // 3) invert lhs
1745 // 4) multiply inverse lhs by rhs
1746 //
1747 // This is a friendly implementation strategy because we don't have to have any
1748 // buffers that scale with N, and equally nice don't have to perform any matrix
1749 // operations that are variable size.
1750 //
1751 // Other implementation strategies could trade this off, e.g. evaluating the
1752 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1753 // the residuals. That would probably require implementing singular value
1754 // decomposition, and would create a [3 x N] matrix to be multiplied by the
1755 // [N x 1] residual vector, but on the upside I think that'd eliminate the
1756 // possibility of this gauss_newton_step() function ever failing.
1757
1758 // 0) start off with lhs and rhs safely zeroed.
1759 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1760 skcms_Vector3 rhs = { {0,0,0} };
1761
1762 // 1,2) evaluate lhs and evaluate rhs
1763 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1764 // so we'll have to update lhs and rhs at the same time.
1765 for (int i = 0; i < N; i++) {
1766 float x = x0 + i*dx;
1767
1768 float dfdP[3] = {0,0,0};
1769 float resid = rg_nonlinear(x,curve,tf, dfdP);
1770
1771 for (int r = 0; r < 3; r++) {
1772 for (int c = 0; c < 3; c++) {
1773 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1774 }
1775 rhs.vals[r] += dfdP[r] * resid;
1776 }
1777 }
1778
1779 // If any of the 3 P parameters are unused, this matrix will be singular.
1780 // Detect those cases and fix them up to indentity instead, so we can invert.
1781 for (int k = 0; k < 3; k++) {
1782 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1783 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1784 lhs.vals[k][k] = 1;
1785 }
1786 }
1787
1788 // 3) invert lhs
1789 skcms_Matrix3x3 lhs_inv;
1790 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1791 return false;
1792 }
1793
1794 // 4) multiply inverse lhs by rhs
1795 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1796 tf->g += dP.vals[0];
1797 tf->a += dP.vals[1];
1798 tf->b += dP.vals[2];
1799 return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
1800 }
1801
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)1802 static float max_roundtrip_error_checked(const skcms_Curve* curve,
1803 const skcms_TransferFunction* tf_inv) {
1804 skcms_TransferFunction tf;
1805 if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
1806 return INFINITY_;
1807 }
1808
1809 skcms_TransferFunction tf_inv_again;
1810 if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
1811 return INFINITY_;
1812 }
1813
1814 return skcms_MaxRoundtripError(curve, &tf_inv_again);
1815 }
1816
1817 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1818 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1819 // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
1820 auto fixup_tf = [tf]() {
1821 // a must be non-negative. That ensures the function is monotonically increasing.
1822 // We don't really know how to fix up a if it goes negative.
1823 if (tf->a < 0) {
1824 return false;
1825 }
1826 // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
1827 // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
1828 if (tf->a * tf->d + tf->b < 0) {
1829 tf->b = -tf->a * tf->d;
1830 }
1831 assert (tf->a >= 0 &&
1832 tf->a * tf->d + tf->b >= 0);
1833
1834 // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
1835 // parameter so we can guarantee this.
1836 tf->e = tf->c*tf->d + tf->f
1837 - powf_(tf->a*tf->d + tf->b, tf->g);
1838
1839 return true;
1840 };
1841
1842 if (!fixup_tf()) {
1843 return false;
1844 }
1845
1846 // No matter where we start, dx should always represent N even steps from 0 to 1.
1847 const float dx = 1.0f / (N-1);
1848
1849 skcms_TransferFunction best_tf = *tf;
1850 float best_max_error = INFINITY_;
1851
1852 // Need this or several curves get worse... *sigh*
1853 float init_error = max_roundtrip_error_checked(curve, tf);
1854 if (init_error < best_max_error) {
1855 best_max_error = init_error;
1856 best_tf = *tf;
1857 }
1858
1859 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
1860 for (int j = 0; j < 8; j++) {
1861 if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) {
1862 *tf = best_tf;
1863 return isfinitef_(best_max_error);
1864 }
1865
1866 float max_error = max_roundtrip_error_checked(curve, tf);
1867 if (max_error < best_max_error) {
1868 best_max_error = max_error;
1869 best_tf = *tf;
1870 }
1871 }
1872
1873 *tf = best_tf;
1874 return isfinitef_(best_max_error);
1875 }
1876
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1877 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1878 skcms_TransferFunction* approx,
1879 float* max_error) {
1880 if (!curve || !approx || !max_error) {
1881 return false;
1882 }
1883
1884 if (curve->table_entries == 0) {
1885 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1886 return false;
1887 }
1888
1889 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1890 // We need at least two points, and must put some reasonable cap on the maximum number.
1891 return false;
1892 }
1893
1894 int N = (int)curve->table_entries;
1895 const float dx = 1.0f / (N - 1);
1896
1897 *max_error = INFINITY_;
1898 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1899 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1900 skcms_TransferFunction tf,
1901 tf_inv;
1902
1903 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1904 tf.f = 0.0f;
1905 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1906
1907 if (L == N) {
1908 // If the entire data set was linear, move the coefficients to the nonlinear portion
1909 // with G == 1. This lets use a canonical representation with d == 0.
1910 tf.g = 1;
1911 tf.a = tf.c;
1912 tf.b = tf.f;
1913 tf.c = tf.d = tf.e = tf.f = 0;
1914 } else if (L == N - 1) {
1915 // Degenerate case with only two points in the nonlinear segment. Solve directly.
1916 tf.g = 1;
1917 tf.a = (eval_curve(curve, (N-1)*dx) -
1918 eval_curve(curve, (N-2)*dx))
1919 / dx;
1920 tf.b = eval_curve(curve, (N-2)*dx)
1921 - tf.a * (N-2)*dx;
1922 tf.e = 0;
1923 } else {
1924 // Start by guessing a gamma-only curve through the midpoint.
1925 int mid = (L + N) / 2;
1926 float mid_x = mid / (N - 1.0f);
1927 float mid_y = eval_curve(curve, mid_x);
1928 tf.g = log2f_(mid_y) / log2f_(mid_x);
1929 tf.a = 1;
1930 tf.b = 0;
1931 tf.e = tf.c*tf.d + tf.f
1932 - powf_(tf.a*tf.d + tf.b, tf.g);
1933
1934
1935 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1936 !fit_nonlinear(curve, L,N, &tf_inv)) {
1937 continue;
1938 }
1939
1940 // We fit tf_inv, so calculate tf to keep in sync.
1941 // fit_nonlinear() should guarantee invertibility.
1942 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1943 assert(false);
1944 continue;
1945 }
1946 }
1947
1948 // We'd better have a sane, sRGB-ish TF by now.
1949 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
1950 // anything else is just some accident of math and the way we pun tf.g as a type flag.
1951 // fit_nonlinear() should guarantee this, but the special cases may fail this test.
1952 if (sRGBish != classify(tf)) {
1953 continue;
1954 }
1955
1956 // We find our error by roundtripping the table through tf_inv.
1957 //
1958 // (The most likely use case for this approximation is to be inverted and
1959 // used as the transfer function for a destination color space.)
1960 //
1961 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1962 // invertible, so re-verify that here (and use the new inverse for testing).
1963 // fit_nonlinear() should guarantee this, but the special cases that don't use
1964 // it may fail this test.
1965 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1966 continue;
1967 }
1968
1969 float err = skcms_MaxRoundtripError(curve, &tf_inv);
1970 if (*max_error > err) {
1971 *max_error = err;
1972 *approx = tf;
1973 }
1974 }
1975 return isfinitef_(*max_error);
1976 }
1977
1978 // ~~~~ Impl. of skcms_Transform() ~~~~
1979
1980 typedef enum {
1981 Op_load_a8,
1982 Op_load_g8,
1983 Op_load_8888_palette8,
1984 Op_load_4444,
1985 Op_load_565,
1986 Op_load_888,
1987 Op_load_8888,
1988 Op_load_1010102,
1989 Op_load_161616LE,
1990 Op_load_16161616LE,
1991 Op_load_161616BE,
1992 Op_load_16161616BE,
1993 Op_load_hhh,
1994 Op_load_hhhh,
1995 Op_load_fff,
1996 Op_load_ffff,
1997
1998 Op_swap_rb,
1999 Op_clamp,
2000 Op_invert,
2001 Op_force_opaque,
2002 Op_premul,
2003 Op_unpremul,
2004 Op_matrix_3x3,
2005 Op_matrix_3x4,
2006 Op_lab_to_xyz,
2007
2008 Op_tf_r,
2009 Op_tf_g,
2010 Op_tf_b,
2011 Op_tf_a,
2012
2013 Op_pq_r,
2014 Op_pq_g,
2015 Op_pq_b,
2016 Op_pq_a,
2017
2018 Op_hlg_r,
2019 Op_hlg_g,
2020 Op_hlg_b,
2021 Op_hlg_a,
2022
2023 Op_hlginv_r,
2024 Op_hlginv_g,
2025 Op_hlginv_b,
2026 Op_hlginv_a,
2027
2028 Op_table_r,
2029 Op_table_g,
2030 Op_table_b,
2031 Op_table_a,
2032
2033 Op_clut,
2034
2035 Op_store_a8,
2036 Op_store_g8,
2037 Op_store_4444,
2038 Op_store_565,
2039 Op_store_888,
2040 Op_store_8888,
2041 Op_store_1010102,
2042 Op_store_161616LE,
2043 Op_store_16161616LE,
2044 Op_store_161616BE,
2045 Op_store_16161616BE,
2046 Op_store_hhh,
2047 Op_store_hhhh,
2048 Op_store_fff,
2049 Op_store_ffff,
2050 } Op;
2051
2052 #if defined(__clang__)
2053 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2054 #elif defined(__GNUC__)
2055 // For some reason GCC accepts this nonsense, but not the more straightforward version,
2056 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2057 template <int N, typename T>
2058 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2059
2060 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2061 #endif
2062
2063 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2064
2065 namespace baseline {
2066 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2067 || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2068 #define N 1
2069 template <typename T> using V = T;
2070 using Color = float;
2071 #elif defined(__AVX512F__)
2072 #define N 16
2073 template <typename T> using V = Vec<N,T>;
2074 using Color = float;
2075 #elif defined(__AVX__)
2076 #define N 8
2077 template <typename T> using V = Vec<N,T>;
2078 using Color = float;
2079 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2080 #define N 8
2081 template <typename T> using V = Vec<N,T>;
2082 using Color = _Float16;
2083 #else
2084 #define N 4
2085 template <typename T> using V = Vec<N,T>;
2086 using Color = float;
2087 #endif
2088
2089 #include "src/Transform_inl.h"
2090 #undef N
2091 }
2092
2093 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2094 #if !defined(SKCMS_PORTABLE) && \
2095 !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2096 (( defined(__clang__) && __clang_major__ >= 5) || \
2097 (!defined(__clang__) && defined(__GNUC__))) \
2098 && defined(__x86_64__)
2099
2100 #if !defined(__AVX2__)
2101 #if defined(__clang__)
2102 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2103 #elif defined(__GNUC__)
2104 #pragma GCC push_options
2105 #pragma GCC target("avx2,f16c")
2106 #endif
2107
2108 namespace hsw {
2109 #define USING_AVX
2110 #define USING_AVX_F16C
2111 #define USING_AVX2
2112 #define N 8
2113 template <typename T> using V = Vec<N,T>;
2114 using Color = float;
2115
2116 #include "src/Transform_inl.h"
2117
2118 // src/Transform_inl.h will undefine USING_* for us.
2119 #undef N
2120 }
2121
2122 #if defined(__clang__)
2123 #pragma clang attribute pop
2124 #elif defined(__GNUC__)
2125 #pragma GCC pop_options
2126 #endif
2127
2128 #define TEST_FOR_HSW
2129 #endif
2130
2131 #if !defined(__AVX512F__)
2132 #if defined(__clang__)
2133 #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2134 #elif defined(__GNUC__)
2135 #pragma GCC push_options
2136 #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2137 #endif
2138
2139 namespace skx {
2140 #define USING_AVX512F
2141 #define N 16
2142 template <typename T> using V = Vec<N,T>;
2143 using Color = float;
2144
2145 #include "src/Transform_inl.h"
2146
2147 // src/Transform_inl.h will undefine USING_* for us.
2148 #undef N
2149 }
2150
2151 #if defined(__clang__)
2152 #pragma clang attribute pop
2153 #elif defined(__GNUC__)
2154 #pragma GCC pop_options
2155 #endif
2156
2157 #define TEST_FOR_SKX
2158 #endif
2159
2160 #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2161 enum class CpuType { None, HSW, SKX };
cpu_type()2162 static CpuType cpu_type() {
2163 static const CpuType type = []{
2164 if (!runtime_cpu_detection) {
2165 return CpuType::None;
2166 }
2167 // See http://www.sandpile.org/x86/cpuid.htm
2168
2169 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2170 uint32_t eax, ebx, ecx, edx;
2171 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2172 : "0"(1), "2"(0));
2173 if ((edx & (1u<<25)) && // SSE
2174 (edx & (1u<<26)) && // SSE2
2175 (ecx & (1u<< 0)) && // SSE3
2176 (ecx & (1u<< 9)) && // SSSE3
2177 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2178 (ecx & (1u<<19)) && // SSE4.1
2179 (ecx & (1u<<20)) && // SSE4.2
2180 (ecx & (1u<<26)) && // XSAVE
2181 (ecx & (1u<<27)) && // OSXSAVE
2182 (ecx & (1u<<28)) && // AVX
2183 (ecx & (1u<<29))) { // F16C
2184
2185 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2186 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2187 : "0"(7), "2"(0));
2188 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2189 uint32_t xcr0, dont_need_edx;
2190 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2191
2192 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2193 (xcr0 & (1u<<2)) && // YMM register state saved?
2194 (ebx & (1u<<5))) { // AVX2
2195 // At this point we're at least HSW. Continue checking for SKX.
2196 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2197 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2198 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2199 (ebx & (1u<<16)) && // AVX512F
2200 (ebx & (1u<<17)) && // AVX512DQ
2201 (ebx & (1u<<28)) && // AVX512CD
2202 (ebx & (1u<<30)) && // AVX512BW
2203 (ebx & (1u<<31))) { // AVX512VL
2204 return CpuType::SKX;
2205 }
2206 return CpuType::HSW;
2207 }
2208 }
2209 return CpuType::None;
2210 }();
2211 return type;
2212 }
2213 #endif
2214
2215 #endif
2216
2217 typedef struct {
2218 Op op;
2219 const void* arg;
2220 } OpAndArg;
2221
select_curve_op(const skcms_Curve * curve,int channel)2222 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2223 static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2224 { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2225 { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2226 { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2227 { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2228 };
2229 const auto& op = ops[channel];
2230
2231 if (curve->table_entries == 0) {
2232 const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2233
2234 const skcms_TransferFunction& tf = curve->parametric;
2235
2236 if (tf.g == 1 && tf.a == 1 &&
2237 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2238 return noop;
2239 }
2240
2241 switch (classify(tf)) {
2242 case Bad: return noop;
2243 case sRGBish: return OpAndArg{op.sRGBish, &tf};
2244 case PQish: return OpAndArg{op.PQish, &tf};
2245 case HLGish: return OpAndArg{op.HLGish, &tf};
2246 case HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2247 }
2248 }
2249 return OpAndArg{op.table, curve};
2250 }
2251
bytes_per_pixel(skcms_PixelFormat fmt)2252 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2253 switch (fmt >> 1) { // ignore rgb/bgr
2254 case skcms_PixelFormat_A_8 >> 1: return 1;
2255 case skcms_PixelFormat_G_8 >> 1: return 1;
2256 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
2257 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2258 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2259 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2260 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2261 case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4;
2262 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2263 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2264 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2265 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2266 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2267 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2268 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2269 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2270 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2271 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2272 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2273 }
2274 assert(false);
2275 return 0;
2276 }
2277
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2278 static bool prep_for_destination(const skcms_ICCProfile* profile,
2279 skcms_Matrix3x3* fromXYZD50,
2280 skcms_TransferFunction* invR,
2281 skcms_TransferFunction* invG,
2282 skcms_TransferFunction* invB) {
2283 // We only support destinations with parametric transfer functions
2284 // and with gamuts that can be transformed from XYZD50.
2285 return profile->has_trc
2286 && profile->has_toXYZD50
2287 && profile->trc[0].table_entries == 0
2288 && profile->trc[1].table_entries == 0
2289 && profile->trc[2].table_entries == 0
2290 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2291 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2292 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2293 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2294 }
2295
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2296 bool skcms_Transform(const void* src,
2297 skcms_PixelFormat srcFmt,
2298 skcms_AlphaFormat srcAlpha,
2299 const skcms_ICCProfile* srcProfile,
2300 void* dst,
2301 skcms_PixelFormat dstFmt,
2302 skcms_AlphaFormat dstAlpha,
2303 const skcms_ICCProfile* dstProfile,
2304 size_t npixels) {
2305 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2306 dst, dstFmt, dstAlpha, dstProfile,
2307 npixels, nullptr);
2308 }
2309
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2310 bool skcms_TransformWithPalette(const void* src,
2311 skcms_PixelFormat srcFmt,
2312 skcms_AlphaFormat srcAlpha,
2313 const skcms_ICCProfile* srcProfile,
2314 void* dst,
2315 skcms_PixelFormat dstFmt,
2316 skcms_AlphaFormat dstAlpha,
2317 const skcms_ICCProfile* dstProfile,
2318 size_t nz,
2319 const void* palette) {
2320 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2321 src_bpp = bytes_per_pixel(srcFmt);
2322 // Let's just refuse if the request is absurdly big.
2323 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2324 return false;
2325 }
2326 int n = (int)nz;
2327
2328 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2329 if (!srcProfile) {
2330 srcProfile = skcms_sRGB_profile();
2331 }
2332 if (!dstProfile) {
2333 dstProfile = skcms_sRGB_profile();
2334 }
2335
2336 // We can't transform in place unless the PixelFormats are the same size.
2337 if (dst == src && dst_bpp != src_bpp) {
2338 return false;
2339 }
2340 // TODO: more careful alias rejection (like, dst == src + 1)?
2341
2342 if (needs_palette(srcFmt) && !palette) {
2343 return false;
2344 }
2345
2346 Op program [32];
2347 const void* arguments[32];
2348
2349 Op* ops = program;
2350 const void** args = arguments;
2351
2352 // These are always parametric curves of some sort.
2353 skcms_Curve dst_curves[3];
2354 dst_curves[0].table_entries =
2355 dst_curves[1].table_entries =
2356 dst_curves[2].table_entries = 0;
2357
2358 skcms_Matrix3x3 from_xyz;
2359
2360 switch (srcFmt >> 1) {
2361 default: return false;
2362 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2363 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2364 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2365 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2366 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2367 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2368 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2369 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2370 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2371 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2372 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2373 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break;
2374 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break;
2375 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2376 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2377 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2378 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2379
2380 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2381 *args++ = palette;
2382 break;
2383 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2384 *ops++ = Op_load_8888;
2385 *ops++ = Op_tf_r; *args++ = skcms_sRGB_TransferFunction();
2386 *ops++ = Op_tf_g; *args++ = skcms_sRGB_TransferFunction();
2387 *ops++ = Op_tf_b; *args++ = skcms_sRGB_TransferFunction();
2388 break;
2389 }
2390 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2391 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2392 *ops++ = Op_clamp;
2393 }
2394 if (srcFmt & 1) {
2395 *ops++ = Op_swap_rb;
2396 }
2397 skcms_ICCProfile gray_dst_profile;
2398 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2399 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2400 // luminance (Y) by the destination transfer function.
2401 gray_dst_profile = *dstProfile;
2402 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2403 dstProfile = &gray_dst_profile;
2404 }
2405
2406 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2407 // Photoshop creates CMYK images as inverse CMYK.
2408 // These happen to be the only ones we've _ever_ seen.
2409 *ops++ = Op_invert;
2410 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2411 srcAlpha = skcms_AlphaFormat_Unpremul;
2412 }
2413
2414 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2415 *ops++ = Op_force_opaque;
2416 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2417 *ops++ = Op_unpremul;
2418 }
2419
2420 if (dstProfile != srcProfile) {
2421
2422 if (!prep_for_destination(dstProfile,
2423 &from_xyz,
2424 &dst_curves[0].parametric,
2425 &dst_curves[1].parametric,
2426 &dst_curves[2].parametric)) {
2427 return false;
2428 }
2429
2430 if (srcProfile->has_A2B) {
2431 if (srcProfile->A2B.input_channels) {
2432 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2433 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2434 if (oa.arg) {
2435 *ops++ = oa.op;
2436 *args++ = oa.arg;
2437 }
2438 }
2439 *ops++ = Op_clamp;
2440 *ops++ = Op_clut;
2441 *args++ = &srcProfile->A2B;
2442 }
2443
2444 if (srcProfile->A2B.matrix_channels == 3) {
2445 for (int i = 0; i < 3; i++) {
2446 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2447 if (oa.arg) {
2448 *ops++ = oa.op;
2449 *args++ = oa.arg;
2450 }
2451 }
2452
2453 static const skcms_Matrix3x4 I = {{
2454 {1,0,0,0},
2455 {0,1,0,0},
2456 {0,0,1,0},
2457 }};
2458 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2459 *ops++ = Op_matrix_3x4;
2460 *args++ = &srcProfile->A2B.matrix;
2461 }
2462 }
2463
2464 if (srcProfile->A2B.output_channels == 3) {
2465 for (int i = 0; i < 3; i++) {
2466 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2467 if (oa.arg) {
2468 *ops++ = oa.op;
2469 *args++ = oa.arg;
2470 }
2471 }
2472 }
2473
2474 if (srcProfile->pcs == skcms_Signature_Lab) {
2475 *ops++ = Op_lab_to_xyz;
2476 }
2477
2478 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2479 for (int i = 0; i < 3; i++) {
2480 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2481 if (oa.arg) {
2482 *ops++ = oa.op;
2483 *args++ = oa.arg;
2484 }
2485 }
2486 } else {
2487 return false;
2488 }
2489
2490 // A2B sources should already be in XYZD50 at this point.
2491 // Others still need to be transformed using their toXYZD50 matrix.
2492 // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2493 // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2494 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2495 static const skcms_Matrix3x3 I = {{
2496 { 1.0f, 0.0f, 0.0f },
2497 { 0.0f, 1.0f, 0.0f },
2498 { 0.0f, 0.0f, 1.0f },
2499 }};
2500 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2501
2502 // There's a chance the source and destination gamuts are identical,
2503 // in which case we can skip the gamut transform.
2504 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2505 // Concat the entire gamut transform into from_xyz,
2506 // now slightly misnamed but it's a handy spot to stash the result.
2507 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2508 *ops++ = Op_matrix_3x3;
2509 *args++ = &from_xyz;
2510 }
2511
2512 // Encode back to dst RGB using its parametric transfer functions.
2513 for (int i = 0; i < 3; i++) {
2514 OpAndArg oa = select_curve_op(dst_curves+i, i);
2515 if (oa.arg) {
2516 assert (oa.op != Op_table_r &&
2517 oa.op != Op_table_g &&
2518 oa.op != Op_table_b &&
2519 oa.op != Op_table_a);
2520 *ops++ = oa.op;
2521 *args++ = oa.arg;
2522 }
2523 }
2524 }
2525
2526 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2527 // not just to values that fit in [0,1].
2528 //
2529 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2530 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2531 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2532 *ops++ = Op_clamp;
2533 }
2534 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2535 *ops++ = Op_force_opaque;
2536 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2537 *ops++ = Op_premul;
2538 }
2539 if (dstFmt & 1) {
2540 *ops++ = Op_swap_rb;
2541 }
2542 switch (dstFmt >> 1) {
2543 default: return false;
2544 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2545 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2546 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2547 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2548 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2549 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2550 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2551 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2552 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2553 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2554 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2555 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break;
2556 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break;
2557 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2558 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2559 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2560 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2561
2562 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2563 *ops++ = Op_tf_r; *args++ = skcms_sRGB_Inverse_TransferFunction();
2564 *ops++ = Op_tf_g; *args++ = skcms_sRGB_Inverse_TransferFunction();
2565 *ops++ = Op_tf_b; *args++ = skcms_sRGB_Inverse_TransferFunction();
2566 *ops++ = Op_store_8888;
2567 break;
2568 }
2569
2570 auto run = baseline::run_program;
2571 #if defined(TEST_FOR_HSW)
2572 switch (cpu_type()) {
2573 case CpuType::None: break;
2574 case CpuType::HSW: run = hsw::run_program; break;
2575 case CpuType::SKX: run = hsw::run_program; break;
2576 }
2577 #endif
2578 #if defined(TEST_FOR_SKX)
2579 switch (cpu_type()) {
2580 case CpuType::None: break;
2581 case CpuType::HSW: break;
2582 case CpuType::SKX: run = skx::run_program; break;
2583 }
2584 #endif
2585 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2586 return true;
2587 }
2588
assert_usable_as_destination(const skcms_ICCProfile * profile)2589 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2590 #if defined(NDEBUG)
2591 (void)profile;
2592 #else
2593 skcms_Matrix3x3 fromXYZD50;
2594 skcms_TransferFunction invR, invG, invB;
2595 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2596 #endif
2597 }
2598
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2599 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2600 skcms_Matrix3x3 fromXYZD50;
2601 if (!profile->has_trc || !profile->has_toXYZD50
2602 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2603 return false;
2604 }
2605
2606 skcms_TransferFunction tf[3];
2607 for (int i = 0; i < 3; i++) {
2608 skcms_TransferFunction inv;
2609 if (profile->trc[i].table_entries == 0
2610 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2611 tf[i] = profile->trc[i].parametric;
2612 continue;
2613 }
2614
2615 float max_error;
2616 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2617 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2618 return false;
2619 }
2620 }
2621
2622 for (int i = 0; i < 3; ++i) {
2623 profile->trc[i].table_entries = 0;
2624 profile->trc[i].parametric = tf[i];
2625 }
2626
2627 assert_usable_as_destination(profile);
2628 return true;
2629 }
2630
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2631 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2632 // Operate on a copy of profile, so we can choose the best TF for the original curves
2633 skcms_ICCProfile result = *profile;
2634 if (!skcms_MakeUsableAsDestination(&result)) {
2635 return false;
2636 }
2637
2638 int best_tf = 0;
2639 float min_max_error = INFINITY_;
2640 for (int i = 0; i < 3; i++) {
2641 skcms_TransferFunction inv;
2642 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2643 return false;
2644 }
2645
2646 float err = 0;
2647 for (int j = 0; j < 3; ++j) {
2648 err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2649 }
2650 if (min_max_error > err) {
2651 min_max_error = err;
2652 best_tf = i;
2653 }
2654 }
2655
2656 for (int i = 0; i < 3; i++) {
2657 result.trc[i].parametric = result.trc[best_tf].parametric;
2658 }
2659
2660 *profile = result;
2661 assert_usable_as_destination(profile);
2662 return true;
2663 }
2664