1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #if defined(__ARM_NEON)
17     #include <arm_neon.h>
18 #elif defined(__SSE__)
19     #include <immintrin.h>
20 
21     #if defined(__clang__)
22         // That #include <immintrin.h> is usually enough, but Clang's headers
23         // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24         // defined, because lots of programs on Windows would include that and
25         // it'd be a lot slower.  But we want all those headers included so we
26         // can use their features after runtime checks later.
27         #include <smmintrin.h>
28         #include <avxintrin.h>
29         #include <avx2intrin.h>
30         #include <avx512fintrin.h>
31         #include <avx512dqintrin.h>
32     #endif
33 #endif
34 
35 static bool runtime_cpu_detection = true;
skcms_DisableRuntimeCPUDetection()36 void skcms_DisableRuntimeCPUDetection() {
37     runtime_cpu_detection = false;
38 }
39 
40 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
41 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
42 //
43 // Please do not use sizeof() directly, and size_t only when required.
44 // (We have no way of enforcing these requests...)
45 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
46 
47 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
48 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
49 
50 static const union {
51     uint32_t bits;
52     float    f;
53 } inf_ = { 0x7f800000 };
54 #define INFINITY_ inf_.f
55 
56 #if defined(__clang__) || defined(__GNUC__)
57     #define small_memcpy __builtin_memcpy
58 #else
59     #define small_memcpy memcpy
60 #endif
61 
log2f_(float x)62 static float log2f_(float x) {
63     // The first approximation of log2(x) is its exponent 'e', minus 127.
64     int32_t bits;
65     small_memcpy(&bits, &x, sizeof(bits));
66 
67     float e = (float)bits * (1.0f / (1<<23));
68 
69     // If we use the mantissa too we can refine the error signficantly.
70     int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
71     float m;
72     small_memcpy(&m, &m_bits, sizeof(m));
73 
74     return (e - 124.225514990f
75               -   1.498030302f*m
76               -   1.725879990f/(0.3520887068f + m));
77 }
logf_(float x)78 static float logf_(float x) {
79     const float ln2 = 0.69314718f;
80     return ln2*log2f_(x);
81 }
82 
exp2f_(float x)83 static float exp2f_(float x) {
84     float fract = x - floorf_(x);
85 
86     float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
87                                         -   1.490129070f*fract
88                                         +  27.728023300f/(4.84252568f - fract));
89 
90     // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
91     // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
92     // Negative values are effectively underflow - we'll end up returning a (different) negative
93     // value, which makes no sense. So clamp to zero.
94     if (fbits >= (float)INT_MAX) {
95         return INFINITY_;
96     } else if (fbits < 0) {
97         return 0;
98     }
99 
100     int32_t bits = (int32_t)fbits;
101     small_memcpy(&x, &bits, sizeof(x));
102     return x;
103 }
104 
105 // Not static, as it's used by some test tools.
powf_(float x,float y)106 float powf_(float x, float y) {
107     assert (x >= 0);
108     return (x == 0) || (x == 1) ? x
109                                 : exp2f_(log2f_(x) * y);
110 }
111 
expf_(float x)112 static float expf_(float x) {
113     const float log2_e = 1.4426950408889634074f;
114     return exp2f_(log2_e * x);
115 }
116 
fmaxf_(float x,float y)117 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)118 static float fminf_(float x, float y) { return x < y ? x : y; }
119 
isfinitef_(float x)120 static bool isfinitef_(float x) { return 0 == x*0; }
121 
minus_1_ulp(float x)122 static float minus_1_ulp(float x) {
123     int32_t bits;
124     memcpy(&bits, &x, sizeof(bits));
125     bits = bits - 1;
126     memcpy(&x, &bits, sizeof(bits));
127     return x;
128 }
129 
130 // Most transfer functions we work with are sRGBish.
131 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
132 // and repurpose the other fields to hold the parameters of the HDR functions.
133 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
134 struct TF_PQish  { float A,B,C,D,E,F; };
135 struct TF_HLGish { float R,G,a,b,c; };
136 
TFKind_marker(TFKind kind)137 static float TFKind_marker(TFKind kind) {
138     // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
139     return -(float)kind;
140 }
141 
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)142 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
143                                                        , TF_HLGish* hlg = nullptr) {
144     if (tf.g < 0 && (int)tf.g == tf.g) {
145         // TODO: sanity checks for PQ/HLG like we do for sRGBish.
146         switch ((int)tf.g) {
147             case -PQish:     if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
148             case -HLGish:    if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
149             case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
150         }
151         return Bad;
152     }
153 
154     // Basic sanity checks for sRGBish transfer functions.
155     if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
156             // a,c,d,g should be non-negative to make any sense.
157             && tf.a >= 0
158             && tf.c >= 0
159             && tf.d >= 0
160             && tf.g >= 0
161             // Raising a negative value to a fractional tf->g produces complex numbers.
162             && tf.a * tf.d + tf.b >= 0) {
163         return sRGBish;
164     }
165 
166     return Bad;
167 }
168 
skcms_TransferFunction_isSRGBish(const skcms_TransferFunction * tf)169 bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
170     return classify(*tf) == sRGBish;
171 }
skcms_TransferFunction_isPQish(const skcms_TransferFunction * tf)172 bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
173     return classify(*tf) == PQish;
174 }
skcms_TransferFunction_isHLGish(const skcms_TransferFunction * tf)175 bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
176     return classify(*tf) == HLGish;
177 }
178 
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)179 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
180                                       float A, float B, float C,
181                                       float D, float E, float F) {
182     *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
183     assert(skcms_TransferFunction_isPQish(tf));
184     return true;
185 }
186 
skcms_TransferFunction_makeHLGish(skcms_TransferFunction * tf,float R,float G,float a,float b,float c)187 bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf,
188                                        float R, float G,
189                                        float a, float b, float c) {
190     *tf = { TFKind_marker(HLGish), R,G, a,b,c, 0 };
191     assert(skcms_TransferFunction_isHLGish(tf));
192     return true;
193 }
194 
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)195 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
196     float sign = x < 0 ? -1.0f : 1.0f;
197     x *= sign;
198 
199     TF_PQish  pq;
200     TF_HLGish hlg;
201     switch (classify(*tf, &pq, &hlg)) {
202         case Bad:       break;
203 
204         case HLGish:    return sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
205                                                     : expf_((x-hlg.c)*hlg.a) + hlg.b);
206 
207         // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
208         case HLGinvish: return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
209                                               : hlg.a * logf_(x - hlg.b) + hlg.c);
210 
211 
212         case sRGBish: return sign * (x < tf->d ?       tf->c * x + tf->f
213                                                : powf_(tf->a * x + tf->b, tf->g) + tf->e);
214 
215         case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
216                                             / (pq.D + pq.E * powf_(x, pq.C)),
217                                         pq.F);
218     }
219     return 0;
220 }
221 
222 
eval_curve(const skcms_Curve * curve,float x)223 static float eval_curve(const skcms_Curve* curve, float x) {
224     if (curve->table_entries == 0) {
225         return skcms_TransferFunction_eval(&curve->parametric, x);
226     }
227 
228     float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
229     int   lo = (int)                   ix        ,
230           hi = (int)(float)minus_1_ulp(ix + 1.0f);
231     float t = ix - (float)lo;
232 
233     float l, h;
234     if (curve->table_8) {
235         l = curve->table_8[lo] * (1/255.0f);
236         h = curve->table_8[hi] * (1/255.0f);
237     } else {
238         uint16_t be_l, be_h;
239         memcpy(&be_l, curve->table_16 + 2*lo, 2);
240         memcpy(&be_h, curve->table_16 + 2*hi, 2);
241         uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
242         uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
243         l = le_l * (1/65535.0f);
244         h = le_h * (1/65535.0f);
245     }
246     return l + (h-l)*t;
247 }
248 
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)249 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
250     uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
251     const float dx = 1.0f / (N - 1);
252     float err = 0;
253     for (uint32_t i = 0; i < N; i++) {
254         float x = i * dx,
255               y = eval_curve(curve, x);
256         err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
257     }
258     return err;
259 }
260 
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)261 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
262     return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
263 }
264 
265 // Additional ICC signature values that are only used internally
266 enum {
267     // File signature
268     skcms_Signature_acsp = 0x61637370,
269 
270     // Tag signatures
271     skcms_Signature_rTRC = 0x72545243,
272     skcms_Signature_gTRC = 0x67545243,
273     skcms_Signature_bTRC = 0x62545243,
274     skcms_Signature_kTRC = 0x6B545243,
275 
276     skcms_Signature_rXYZ = 0x7258595A,
277     skcms_Signature_gXYZ = 0x6758595A,
278     skcms_Signature_bXYZ = 0x6258595A,
279 
280     skcms_Signature_A2B0 = 0x41324230,
281     skcms_Signature_A2B1 = 0x41324231,
282     skcms_Signature_mAB  = 0x6D414220,
283 
284     skcms_Signature_CHAD = 0x63686164,
285     skcms_Signature_WTPT = 0x77747074,
286 
287     // Type signatures
288     skcms_Signature_curv = 0x63757276,
289     skcms_Signature_mft1 = 0x6D667431,
290     skcms_Signature_mft2 = 0x6D667432,
291     skcms_Signature_para = 0x70617261,
292     skcms_Signature_sf32 = 0x73663332,
293     // XYZ is also a PCS signature, so it's defined in skcms.h
294     // skcms_Signature_XYZ = 0x58595A20,
295 };
296 
read_big_u16(const uint8_t * ptr)297 static uint16_t read_big_u16(const uint8_t* ptr) {
298     uint16_t be;
299     memcpy(&be, ptr, sizeof(be));
300 #if defined(_MSC_VER)
301     return _byteswap_ushort(be);
302 #else
303     return __builtin_bswap16(be);
304 #endif
305 }
306 
read_big_u32(const uint8_t * ptr)307 static uint32_t read_big_u32(const uint8_t* ptr) {
308     uint32_t be;
309     memcpy(&be, ptr, sizeof(be));
310 #if defined(_MSC_VER)
311     return _byteswap_ulong(be);
312 #else
313     return __builtin_bswap32(be);
314 #endif
315 }
316 
read_big_i32(const uint8_t * ptr)317 static int32_t read_big_i32(const uint8_t* ptr) {
318     return (int32_t)read_big_u32(ptr);
319 }
320 
read_big_fixed(const uint8_t * ptr)321 static float read_big_fixed(const uint8_t* ptr) {
322     return read_big_i32(ptr) * (1.0f / 65536.0f);
323 }
324 
325 // Maps to an in-memory profile so that fields line up to the locations specified
326 // in ICC.1:2010, section 7.2
327 typedef struct {
328     uint8_t size                [ 4];
329     uint8_t cmm_type            [ 4];
330     uint8_t version             [ 4];
331     uint8_t profile_class       [ 4];
332     uint8_t data_color_space    [ 4];
333     uint8_t pcs                 [ 4];
334     uint8_t creation_date_time  [12];
335     uint8_t signature           [ 4];
336     uint8_t platform            [ 4];
337     uint8_t flags               [ 4];
338     uint8_t device_manufacturer [ 4];
339     uint8_t device_model        [ 4];
340     uint8_t device_attributes   [ 8];
341     uint8_t rendering_intent    [ 4];
342     uint8_t illuminant_X        [ 4];
343     uint8_t illuminant_Y        [ 4];
344     uint8_t illuminant_Z        [ 4];
345     uint8_t creator             [ 4];
346     uint8_t profile_id          [16];
347     uint8_t reserved            [28];
348     uint8_t tag_count           [ 4]; // Technically not part of header, but required
349 } header_Layout;
350 
351 typedef struct {
352     uint8_t signature [4];
353     uint8_t offset    [4];
354     uint8_t size      [4];
355 } tag_Layout;
356 
get_tag_table(const skcms_ICCProfile * profile)357 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
358     return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
359 }
360 
361 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
362 // use of the type is for the CHAD tag that stores exactly nine values.
363 typedef struct {
364     uint8_t type     [ 4];
365     uint8_t reserved [ 4];
366     uint8_t values   [36];
367 } sf32_Layout;
368 
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)369 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
370     skcms_ICCTag tag;
371     if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
372         return false;
373     }
374 
375     if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
376         return false;
377     }
378 
379     const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
380     const uint8_t* values = sf32Tag->values;
381     for (int r = 0; r < 3; ++r)
382     for (int c = 0; c < 3; ++c, values += 4) {
383         m->vals[r][c] = read_big_fixed(values);
384     }
385     return true;
386 }
387 
388 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
389 // the type are for tags/data that store exactly one triple.
390 typedef struct {
391     uint8_t type     [4];
392     uint8_t reserved [4];
393     uint8_t X        [4];
394     uint8_t Y        [4];
395     uint8_t Z        [4];
396 } XYZ_Layout;
397 
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)398 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
399     if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
400         return false;
401     }
402 
403     const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
404 
405     *x = read_big_fixed(xyzTag->X);
406     *y = read_big_fixed(xyzTag->Y);
407     *z = read_big_fixed(xyzTag->Z);
408     return true;
409 }
410 
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])411 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
412     skcms_ICCTag tag;
413     return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
414            read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
415 }
416 
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)417 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
418                            const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
419     return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
420            read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
421            read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
422 }
423 
424 typedef struct {
425     uint8_t type          [4];
426     uint8_t reserved_a    [4];
427     uint8_t function_type [2];
428     uint8_t reserved_b    [2];
429     uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
430 } para_Layout;
431 
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)432 static bool read_curve_para(const uint8_t* buf, uint32_t size,
433                             skcms_Curve* curve, uint32_t* curve_size) {
434     if (size < SAFE_FIXED_SIZE(para_Layout)) {
435         return false;
436     }
437 
438     const para_Layout* paraTag = (const para_Layout*)buf;
439 
440     enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
441     uint16_t function_type = read_big_u16(paraTag->function_type);
442     if (function_type > kGABCDEF) {
443         return false;
444     }
445 
446     static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
447     if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
448         return false;
449     }
450 
451     if (curve_size) {
452         *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
453     }
454 
455     curve->table_entries = 0;
456     curve->parametric.a  = 1.0f;
457     curve->parametric.b  = 0.0f;
458     curve->parametric.c  = 0.0f;
459     curve->parametric.d  = 0.0f;
460     curve->parametric.e  = 0.0f;
461     curve->parametric.f  = 0.0f;
462     curve->parametric.g  = read_big_fixed(paraTag->variable);
463 
464     switch (function_type) {
465         case kGAB:
466             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
467             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
468             if (curve->parametric.a == 0) {
469                 return false;
470             }
471             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
472             break;
473         case kGABC:
474             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
475             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
476             curve->parametric.e = read_big_fixed(paraTag->variable + 12);
477             if (curve->parametric.a == 0) {
478                 return false;
479             }
480             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
481             curve->parametric.f = curve->parametric.e;
482             break;
483         case kGABCD:
484             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
485             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
486             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
487             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
488             break;
489         case kGABCDEF:
490             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
491             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
492             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
493             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
494             curve->parametric.e = read_big_fixed(paraTag->variable + 20);
495             curve->parametric.f = read_big_fixed(paraTag->variable + 24);
496             break;
497     }
498     return skcms_TransferFunction_isSRGBish(&curve->parametric);
499 }
500 
501 typedef struct {
502     uint8_t type          [4];
503     uint8_t reserved      [4];
504     uint8_t value_count   [4];
505     uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
506 } curv_Layout;
507 
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)508 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
509                             skcms_Curve* curve, uint32_t* curve_size) {
510     if (size < SAFE_FIXED_SIZE(curv_Layout)) {
511         return false;
512     }
513 
514     const curv_Layout* curvTag = (const curv_Layout*)buf;
515 
516     uint32_t value_count = read_big_u32(curvTag->value_count);
517     if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
518         return false;
519     }
520 
521     if (curve_size) {
522         *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
523     }
524 
525     if (value_count < 2) {
526         curve->table_entries = 0;
527         curve->parametric.a  = 1.0f;
528         curve->parametric.b  = 0.0f;
529         curve->parametric.c  = 0.0f;
530         curve->parametric.d  = 0.0f;
531         curve->parametric.e  = 0.0f;
532         curve->parametric.f  = 0.0f;
533         if (value_count == 0) {
534             // Empty tables are a shorthand for an identity curve
535             curve->parametric.g = 1.0f;
536         } else {
537             // Single entry tables are a shorthand for simple gamma
538             curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
539         }
540     } else {
541         curve->table_8       = nullptr;
542         curve->table_16      = curvTag->variable;
543         curve->table_entries = value_count;
544     }
545 
546     return true;
547 }
548 
549 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
550 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)551 static bool read_curve(const uint8_t* buf, uint32_t size,
552                        skcms_Curve* curve, uint32_t* curve_size) {
553     if (!buf || size < 4 || !curve) {
554         return false;
555     }
556 
557     uint32_t type = read_big_u32(buf);
558     if (type == skcms_Signature_para) {
559         return read_curve_para(buf, size, curve, curve_size);
560     } else if (type == skcms_Signature_curv) {
561         return read_curve_curv(buf, size, curve, curve_size);
562     }
563 
564     return false;
565 }
566 
567 // mft1 and mft2 share a large chunk of data
568 typedef struct {
569     uint8_t type                 [ 4];
570     uint8_t reserved_a           [ 4];
571     uint8_t input_channels       [ 1];
572     uint8_t output_channels      [ 1];
573     uint8_t grid_points          [ 1];
574     uint8_t reserved_b           [ 1];
575     uint8_t matrix               [36];
576 } mft_CommonLayout;
577 
578 typedef struct {
579     mft_CommonLayout common      [1];
580 
581     uint8_t variable             [1/*variable*/];
582 } mft1_Layout;
583 
584 typedef struct {
585     mft_CommonLayout common      [1];
586 
587     uint8_t input_table_entries  [2];
588     uint8_t output_table_entries [2];
589     uint8_t variable             [1/*variable*/];
590 } mft2_Layout;
591 
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)592 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
593     // MFT matrices are applied before the first set of curves, but must be identity unless the
594     // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
595     // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
596     // field/flag.
597     a2b->matrix_channels = 0;
598 
599     a2b->input_channels  = mftTag->input_channels[0];
600     a2b->output_channels = mftTag->output_channels[0];
601 
602     // We require exactly three (ie XYZ/Lab/RGB) output channels
603     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
604         return false;
605     }
606     // We require at least one, and no more than four (ie CMYK) input channels
607     if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
608         return false;
609     }
610 
611     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
612         a2b->grid_points[i] = mftTag->grid_points[0];
613     }
614     // The grid only makes sense with at least two points along each axis
615     if (a2b->grid_points[0] < 2) {
616         return false;
617     }
618 
619     return true;
620 }
621 
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)622 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
623                             uint32_t input_table_entries, uint32_t output_table_entries,
624                             skcms_A2B* a2b) {
625     // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
626     uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
627     uint32_t byte_len_per_output_table = output_table_entries * byte_width;
628 
629     // [input|output]_channels are <= 4, so still no overflow
630     uint32_t byte_len_all_input_tables  = a2b->input_channels * byte_len_per_input_table;
631     uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
632 
633     uint64_t grid_size = a2b->output_channels * byte_width;
634     for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
635         grid_size *= a2b->grid_points[axis];
636     }
637 
638     if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
639         return false;
640     }
641 
642     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
643         a2b->input_curves[i].table_entries = input_table_entries;
644         if (byte_width == 1) {
645             a2b->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
646             a2b->input_curves[i].table_16 = nullptr;
647         } else {
648             a2b->input_curves[i].table_8  = nullptr;
649             a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
650         }
651     }
652 
653     if (byte_width == 1) {
654         a2b->grid_8  = table_base + byte_len_all_input_tables;
655         a2b->grid_16 = nullptr;
656     } else {
657         a2b->grid_8  = nullptr;
658         a2b->grid_16 = table_base + byte_len_all_input_tables;
659     }
660 
661     const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
662     for (uint32_t i = 0; i < a2b->output_channels; ++i) {
663         a2b->output_curves[i].table_entries = output_table_entries;
664         if (byte_width == 1) {
665             a2b->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
666             a2b->output_curves[i].table_16 = nullptr;
667         } else {
668             a2b->output_curves[i].table_8  = nullptr;
669             a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
670         }
671     }
672 
673     return true;
674 }
675 
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)676 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
677     if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
678         return false;
679     }
680 
681     const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
682     if (!read_mft_common(mftTag->common, a2b)) {
683         return false;
684     }
685 
686     uint32_t input_table_entries  = 256;
687     uint32_t output_table_entries = 256;
688 
689     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
690                            input_table_entries, output_table_entries, a2b);
691 }
692 
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)693 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
694     if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
695         return false;
696     }
697 
698     const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
699     if (!read_mft_common(mftTag->common, a2b)) {
700         return false;
701     }
702 
703     uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
704     uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
705 
706     // ICC spec mandates that 2 <= table_entries <= 4096
707     if (input_table_entries < 2 || input_table_entries > 4096 ||
708         output_table_entries < 2 || output_table_entries > 4096) {
709         return false;
710     }
711 
712     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
713                            input_table_entries, output_table_entries, a2b);
714 }
715 
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)716 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
717                         uint32_t num_curves, skcms_Curve* curves) {
718     for (uint32_t i = 0; i < num_curves; ++i) {
719         if (curve_offset > size) {
720             return false;
721         }
722 
723         uint32_t curve_bytes;
724         if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
725             return false;
726         }
727 
728         if (curve_bytes > UINT32_MAX - 3) {
729             return false;
730         }
731         curve_bytes = (curve_bytes + 3) & ~3U;
732 
733         uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
734         curve_offset = (uint32_t)new_offset_64;
735         if (new_offset_64 != curve_offset) {
736             return false;
737         }
738     }
739 
740     return true;
741 }
742 
743 typedef struct {
744     uint8_t type                 [ 4];
745     uint8_t reserved_a           [ 4];
746     uint8_t input_channels       [ 1];
747     uint8_t output_channels      [ 1];
748     uint8_t reserved_b           [ 2];
749     uint8_t b_curve_offset       [ 4];
750     uint8_t matrix_offset        [ 4];
751     uint8_t m_curve_offset       [ 4];
752     uint8_t clut_offset          [ 4];
753     uint8_t a_curve_offset       [ 4];
754 } mAB_Layout;
755 
756 typedef struct {
757     uint8_t grid_points          [16];
758     uint8_t grid_byte_width      [ 1];
759     uint8_t reserved             [ 3];
760     uint8_t variable             [1/*variable*/];
761 } mABCLUT_Layout;
762 
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)763 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
764     if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
765         return false;
766     }
767 
768     const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
769 
770     a2b->input_channels  = mABTag->input_channels[0];
771     a2b->output_channels = mABTag->output_channels[0];
772 
773     // We require exactly three (ie XYZ/Lab/RGB) output channels
774     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
775         return false;
776     }
777     // We require no more than four (ie CMYK) input channels
778     if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
779         return false;
780     }
781 
782     uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
783     uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
784     uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
785     uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
786     uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
787 
788     // "B" curves must be present
789     if (0 == b_curve_offset) {
790         return false;
791     }
792 
793     if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
794                      a2b->output_curves)) {
795         return false;
796     }
797 
798     // "M" curves and Matrix must be used together
799     if (0 != m_curve_offset) {
800         if (0 == matrix_offset) {
801             return false;
802         }
803         a2b->matrix_channels = a2b->output_channels;
804         if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
805                          a2b->matrix_curves)) {
806             return false;
807         }
808 
809         // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
810         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
811             return false;
812         }
813         float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
814         const uint8_t* mtx_buf = tag->buf + matrix_offset;
815         a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
816         a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
817         a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
818         a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
819         a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
820         a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
821         a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
822         a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
823         a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
824         a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
825         a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
826         a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
827     } else {
828         if (0 != matrix_offset) {
829             return false;
830         }
831         a2b->matrix_channels = 0;
832     }
833 
834     // "A" curves and CLUT must be used together
835     if (0 != a_curve_offset) {
836         if (0 == clut_offset) {
837             return false;
838         }
839         if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
840                          a2b->input_curves)) {
841             return false;
842         }
843 
844         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
845             return false;
846         }
847         const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
848 
849         if (clut->grid_byte_width[0] == 1) {
850             a2b->grid_8  = clut->variable;
851             a2b->grid_16 = nullptr;
852         } else if (clut->grid_byte_width[0] == 2) {
853             a2b->grid_8  = nullptr;
854             a2b->grid_16 = clut->variable;
855         } else {
856             return false;
857         }
858 
859         uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
860         for (uint32_t i = 0; i < a2b->input_channels; ++i) {
861             a2b->grid_points[i] = clut->grid_points[i];
862             // The grid only makes sense with at least two points along each axis
863             if (a2b->grid_points[i] < 2) {
864                 return false;
865             }
866             grid_size *= a2b->grid_points[i];
867         }
868         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
869             return false;
870         }
871     } else {
872         if (0 != clut_offset) {
873             return false;
874         }
875 
876         // If there is no CLUT, the number of input and output channels must match
877         if (a2b->input_channels != a2b->output_channels) {
878             return false;
879         }
880 
881         // Zero out the number of input channels to signal that we're skipping this stage
882         a2b->input_channels = 0;
883     }
884 
885     return true;
886 }
887 
888 // If you pass f, we'll fit a possibly-non-zero value for *f.
889 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)890 static int fit_linear(const skcms_Curve* curve, int N, float tol,
891                       float* c, float* d, float* f = nullptr) {
892     assert(N > 1);
893     // We iteratively fit the first points to the TF's linear piece.
894     // We want the cx + f line to pass through the first and last points we fit exactly.
895     //
896     // As we walk along the points we find the minimum and maximum slope of the line before the
897     // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
898     // emtpy, when we definitely can't add any more points.
899     //
900     // Some points' error intervals may intersect the running interval but not lie fully
901     // within it.  So we keep track of the last point we saw that is a valid end point candidate,
902     // and once the search is done, back up to build the line through *that* point.
903     const float dx = 1.0f / (N - 1);
904 
905     int lin_points = 1;
906 
907     float f_zero = 0.0f;
908     if (f) {
909         *f = eval_curve(curve, 0);
910     } else {
911         f = &f_zero;
912     }
913 
914 
915     float slope_min = -INFINITY_;
916     float slope_max = +INFINITY_;
917     for (int i = 1; i < N; ++i) {
918         float x = i * dx;
919         float y = eval_curve(curve, x);
920 
921         float slope_max_i = (y + tol - *f) / x,
922               slope_min_i = (y - tol - *f) / x;
923         if (slope_max_i < slope_min || slope_max < slope_min_i) {
924             // Slope intervals would no longer overlap.
925             break;
926         }
927         slope_max = fminf_(slope_max, slope_max_i);
928         slope_min = fmaxf_(slope_min, slope_min_i);
929 
930         float cur_slope = (y - *f) / x;
931         if (slope_min <= cur_slope && cur_slope <= slope_max) {
932             lin_points = i + 1;
933             *c = cur_slope;
934         }
935     }
936 
937     // Set D to the last point that met our tolerance.
938     *d = (lin_points - 1) * dx;
939     return lin_points;
940 }
941 
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)942 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
943     bool ok = false;
944     if (tag->type == skcms_Signature_mft1) {
945         ok = read_tag_mft1(tag, a2b);
946     } else if (tag->type == skcms_Signature_mft2) {
947         ok = read_tag_mft2(tag, a2b);
948     } else if (tag->type == skcms_Signature_mAB) {
949         ok = read_tag_mab(tag, a2b, pcs_is_xyz);
950     }
951     if (!ok) {
952         return false;
953     }
954 
955     // Detect and canonicalize identity tables.
956     skcms_Curve* curves[] = {
957         a2b->input_channels  > 0 ? a2b->input_curves  + 0 : nullptr,
958         a2b->input_channels  > 1 ? a2b->input_curves  + 1 : nullptr,
959         a2b->input_channels  > 2 ? a2b->input_curves  + 2 : nullptr,
960         a2b->input_channels  > 3 ? a2b->input_curves  + 3 : nullptr,
961         a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
962         a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
963         a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
964         a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
965         a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
966         a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
967     };
968 
969     for (int i = 0; i < ARRAY_COUNT(curves); i++) {
970         skcms_Curve* curve = curves[i];
971 
972         if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
973             int N = (int)curve->table_entries;
974 
975             float c = 0.0f, d = 0.0f, f = 0.0f;
976             if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
977                 && c == 1.0f
978                 && f == 0.0f) {
979                 curve->table_entries = 0;
980                 curve->table_8       = nullptr;
981                 curve->table_16      = nullptr;
982                 curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
983             }
984         }
985     }
986 
987     return true;
988 }
989 
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)990 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
991     if (!profile || !profile->buffer || !tag) { return; }
992     if (idx > profile->tag_count) { return; }
993     const tag_Layout* tags = get_tag_table(profile);
994     tag->signature = read_big_u32(tags[idx].signature);
995     tag->size      = read_big_u32(tags[idx].size);
996     tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
997     tag->type      = read_big_u32(tag->buf);
998 }
999 
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)1000 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1001     if (!profile || !profile->buffer || !tag) { return false; }
1002     const tag_Layout* tags = get_tag_table(profile);
1003     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1004         if (read_big_u32(tags[i].signature) == sig) {
1005             tag->signature = sig;
1006             tag->size      = read_big_u32(tags[i].size);
1007             tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
1008             tag->type      = read_big_u32(tag->buf);
1009             return true;
1010         }
1011     }
1012     return false;
1013 }
1014 
usable_as_src(const skcms_ICCProfile * profile)1015 static bool usable_as_src(const skcms_ICCProfile* profile) {
1016     return profile->has_A2B
1017        || (profile->has_trc && profile->has_toXYZD50);
1018 }
1019 
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)1020 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
1021     assert(SAFE_SIZEOF(header_Layout) == 132);
1022 
1023     if (!profile) {
1024         return false;
1025     }
1026     memset(profile, 0, SAFE_SIZEOF(*profile));
1027 
1028     if (len < SAFE_SIZEOF(header_Layout)) {
1029         return false;
1030     }
1031 
1032     // Byte-swap all header fields
1033     const header_Layout* header  = (const header_Layout*)buf;
1034     profile->buffer              = (const uint8_t*)buf;
1035     profile->size                = read_big_u32(header->size);
1036     uint32_t version             = read_big_u32(header->version);
1037     profile->data_color_space    = read_big_u32(header->data_color_space);
1038     profile->pcs                 = read_big_u32(header->pcs);
1039     uint32_t signature           = read_big_u32(header->signature);
1040     float illuminant_X           = read_big_fixed(header->illuminant_X);
1041     float illuminant_Y           = read_big_fixed(header->illuminant_Y);
1042     float illuminant_Z           = read_big_fixed(header->illuminant_Z);
1043     profile->tag_count           = read_big_u32(header->tag_count);
1044 
1045     // Validate signature, size (smaller than buffer, large enough to hold tag table),
1046     // and major version
1047     uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1048     if (signature != skcms_Signature_acsp ||
1049         profile->size > len ||
1050         profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1051         (version >> 24) > 4) {
1052         return false;
1053     }
1054 
1055     // Validate that illuminant is D50 white
1056     if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1057         fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1058         fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1059         return false;
1060     }
1061 
1062     // Validate that all tag entries have sane offset + size
1063     const tag_Layout* tags = get_tag_table(profile);
1064     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1065         uint32_t tag_offset = read_big_u32(tags[i].offset);
1066         uint32_t tag_size   = read_big_u32(tags[i].size);
1067         uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
1068         if (tag_size < 4 || tag_end > profile->size) {
1069             return false;
1070         }
1071     }
1072 
1073     if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1074         return false;
1075     }
1076 
1077     bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1078 
1079     // Pre-parse commonly used tags.
1080     skcms_ICCTag kTRC;
1081     if (profile->data_color_space == skcms_Signature_Gray &&
1082         skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1083         if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1084             // Malformed tag
1085             return false;
1086         }
1087         profile->trc[1] = profile->trc[0];
1088         profile->trc[2] = profile->trc[0];
1089         profile->has_trc = true;
1090 
1091         if (pcs_is_xyz) {
1092             profile->toXYZD50.vals[0][0] = illuminant_X;
1093             profile->toXYZD50.vals[1][1] = illuminant_Y;
1094             profile->toXYZD50.vals[2][2] = illuminant_Z;
1095             profile->has_toXYZD50 = true;
1096         }
1097     } else {
1098         skcms_ICCTag rTRC, gTRC, bTRC;
1099         if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1100             skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1101             skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1102             if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1103                 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1104                 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1105                 // Malformed TRC tags
1106                 return false;
1107             }
1108             profile->has_trc = true;
1109         }
1110 
1111         skcms_ICCTag rXYZ, gXYZ, bXYZ;
1112         if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1113             skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1114             skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1115             if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1116                 // Malformed XYZ tags
1117                 return false;
1118             }
1119             profile->has_toXYZD50 = true;
1120         }
1121     }
1122 
1123     skcms_ICCTag a2b_tag;
1124 
1125     // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
1126     // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
1127     // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
1128     // and all our known users are thinking exclusively in terms of relative colormetric.
1129     const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
1130     for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
1131         if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
1132             if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
1133                 // Malformed A2B tag
1134                 return false;
1135             }
1136             profile->has_A2B = true;
1137             break;
1138         }
1139     }
1140 
1141     return usable_as_src(profile);
1142 }
1143 
1144 
skcms_sRGB_profile()1145 const skcms_ICCProfile* skcms_sRGB_profile() {
1146     static const skcms_ICCProfile sRGB_profile = {
1147         nullptr,               // buffer, moot here
1148 
1149         0,                     // size, moot here
1150         skcms_Signature_RGB,   // data_color_space
1151         skcms_Signature_XYZ,   // pcs
1152         0,                     // tag count, moot here
1153 
1154         // We choose to represent sRGB with its canonical transfer function,
1155         // and with its canonical XYZD50 gamut matrix.
1156         true,  // has_trc, followed by the 3 trc curves
1157         {
1158             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1159             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1160             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1161         },
1162 
1163         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1164         {{
1165             { 0.436065674f, 0.385147095f, 0.143066406f },
1166             { 0.222488403f, 0.716873169f, 0.060607910f },
1167             { 0.013916016f, 0.097076416f, 0.714096069f },
1168         }},
1169 
1170         false, // has_A2B, followed by a2b itself which we don't care about.
1171         {
1172             0,
1173             {
1174                 {{0, {0,0, 0,0,0,0,0}}},
1175                 {{0, {0,0, 0,0,0,0,0}}},
1176                 {{0, {0,0, 0,0,0,0,0}}},
1177                 {{0, {0,0, 0,0,0,0,0}}},
1178             },
1179             {0,0,0,0},
1180             nullptr,
1181             nullptr,
1182 
1183             0,
1184             {
1185                 {{0, {0,0, 0,0,0,0,0}}},
1186                 {{0, {0,0, 0,0,0,0,0}}},
1187                 {{0, {0,0, 0,0,0,0,0}}},
1188             },
1189             {{
1190                 { 0,0,0,0 },
1191                 { 0,0,0,0 },
1192                 { 0,0,0,0 },
1193             }},
1194 
1195             0,
1196             {
1197                 {{0, {0,0, 0,0,0,0,0}}},
1198                 {{0, {0,0, 0,0,0,0,0}}},
1199                 {{0, {0,0, 0,0,0,0,0}}},
1200             },
1201         },
1202     };
1203     return &sRGB_profile;
1204 }
1205 
skcms_XYZD50_profile()1206 const skcms_ICCProfile* skcms_XYZD50_profile() {
1207     // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1208     static const skcms_ICCProfile XYZD50_profile = {
1209         nullptr,               // buffer, moot here
1210 
1211         0,                     // size, moot here
1212         skcms_Signature_RGB,   // data_color_space
1213         skcms_Signature_XYZ,   // pcs
1214         0,                     // tag count, moot here
1215 
1216         true,  // has_trc, followed by the 3 trc curves
1217         {
1218             {{0, {1,1, 0,0,0,0,0}}},
1219             {{0, {1,1, 0,0,0,0,0}}},
1220             {{0, {1,1, 0,0,0,0,0}}},
1221         },
1222 
1223         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1224         {{
1225             { 1,0,0 },
1226             { 0,1,0 },
1227             { 0,0,1 },
1228         }},
1229 
1230         false, // has_A2B, followed by a2b itself which we don't care about.
1231         {
1232             0,
1233             {
1234                 {{0, {0,0, 0,0,0,0,0}}},
1235                 {{0, {0,0, 0,0,0,0,0}}},
1236                 {{0, {0,0, 0,0,0,0,0}}},
1237                 {{0, {0,0, 0,0,0,0,0}}},
1238             },
1239             {0,0,0,0},
1240             nullptr,
1241             nullptr,
1242 
1243             0,
1244             {
1245                 {{0, {0,0, 0,0,0,0,0}}},
1246                 {{0, {0,0, 0,0,0,0,0}}},
1247                 {{0, {0,0, 0,0,0,0,0}}},
1248             },
1249             {{
1250                 { 0,0,0,0 },
1251                 { 0,0,0,0 },
1252                 { 0,0,0,0 },
1253             }},
1254 
1255             0,
1256             {
1257                 {{0, {0,0, 0,0,0,0,0}}},
1258                 {{0, {0,0, 0,0,0,0,0}}},
1259                 {{0, {0,0, 0,0,0,0,0}}},
1260             },
1261         },
1262     };
1263 
1264     return &XYZD50_profile;
1265 }
1266 
skcms_sRGB_TransferFunction()1267 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1268     return &skcms_sRGB_profile()->trc[0].parametric;
1269 }
1270 
skcms_sRGB_Inverse_TransferFunction()1271 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1272     static const skcms_TransferFunction sRGB_inv =
1273         {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1274     return &sRGB_inv;
1275 }
1276 
skcms_Identity_TransferFunction()1277 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1278     static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1279     return &identity;
1280 }
1281 
1282 const uint8_t skcms_252_random_bytes[] = {
1283     8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1284     119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1285     154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1286     194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1287     108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1288     70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1289     137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1290     9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1291     129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1292     140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1293     219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1294     123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1295     189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1296     174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1297     2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1298     112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1299 };
1300 
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1301 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1302     // Test for exactly equal profiles first.
1303     if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1304         return true;
1305     }
1306 
1307     // For now this is the essentially the same strategy we use in test_only.c
1308     // for our skcms_Transform() smoke tests:
1309     //    1) transform A to XYZD50
1310     //    2) transform B to XYZD50
1311     //    3) return true if they're similar enough
1312     // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1313 
1314     // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1315     // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1316 
1317     // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1318     // to be treated as equal.  But CMYK profiles are a totally different ballgame.
1319     const auto CMYK = skcms_Signature_CMYK;
1320     if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1321         return false;
1322     }
1323 
1324     // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1325     // TODO: working with RGBA_8888 either way is probably fastest.
1326     skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1327     size_t npixels = 84;
1328     if (A->data_color_space == skcms_Signature_CMYK) {
1329         fmt = skcms_PixelFormat_RGBA_8888;
1330         npixels = 63;
1331     }
1332 
1333     // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1334     // use pre-canned results and skip that skcms_Transform() call?
1335     uint8_t dstA[252],
1336             dstB[252];
1337     if (!skcms_Transform(
1338                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1339                 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1340                 npixels)) {
1341         return false;
1342     }
1343     if (!skcms_Transform(
1344                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1345                 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1346                 npixels)) {
1347         return false;
1348     }
1349 
1350     // TODO: make sure this final check has reasonable codegen.
1351     for (size_t i = 0; i < 252; i++) {
1352         if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1353             return false;
1354         }
1355     }
1356     return true;
1357 }
1358 
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1359 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1360                                       const skcms_TransferFunction* inv_tf) {
1361     if (!profile || !profile->has_trc) {
1362         return false;
1363     }
1364 
1365     return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1366            skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1367            skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1368 }
1369 
is_zero_to_one(float x)1370 static bool is_zero_to_one(float x) {
1371     return 0 <= x && x <= 1;
1372 }
1373 
1374 typedef struct { float vals[3]; } skcms_Vector3;
1375 
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1376 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1377     skcms_Vector3 dst = {{0,0,0}};
1378     for (int row = 0; row < 3; ++row) {
1379         dst.vals[row] = m->vals[row][0] * v->vals[0]
1380                       + m->vals[row][1] * v->vals[1]
1381                       + m->vals[row][2] * v->vals[2];
1382     }
1383     return dst;
1384 }
1385 
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1386 bool skcms_AdaptToXYZD50(float wx, float wy,
1387                          skcms_Matrix3x3* toXYZD50) {
1388     if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1389         !toXYZD50) {
1390         return false;
1391     }
1392 
1393     // Assumes that Y is 1.0f.
1394     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1395 
1396     // Now convert toXYZ matrix to toXYZD50.
1397     skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1398 
1399     // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1400     // the matrices below.  The Bradford method is used by Adobe and is widely considered
1401     // to be the best.
1402     skcms_Matrix3x3 xyz_to_lms = {{
1403         {  0.8951f,  0.2664f, -0.1614f },
1404         { -0.7502f,  1.7135f,  0.0367f },
1405         {  0.0389f, -0.0685f,  1.0296f },
1406     }};
1407     skcms_Matrix3x3 lms_to_xyz = {{
1408         {  0.9869929f, -0.1470543f, 0.1599627f },
1409         {  0.4323053f,  0.5183603f, 0.0492912f },
1410         { -0.0085287f,  0.0400428f, 0.9684867f },
1411     }};
1412 
1413     skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1414     skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1415 
1416     *toXYZD50 = {{
1417         { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1418         { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1419         { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1420     }};
1421     *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1422     *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1423 
1424     return true;
1425 }
1426 
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1427 bool skcms_PrimariesToXYZD50(float rx, float ry,
1428                              float gx, float gy,
1429                              float bx, float by,
1430                              float wx, float wy,
1431                              skcms_Matrix3x3* toXYZD50) {
1432     if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1433         !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1434         !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1435         !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1436         !toXYZD50) {
1437         return false;
1438     }
1439 
1440     // First, we need to convert xy values (primaries) to XYZ.
1441     skcms_Matrix3x3 primaries = {{
1442         { rx, gx, bx },
1443         { ry, gy, by },
1444         { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1445     }};
1446     skcms_Matrix3x3 primaries_inv;
1447     if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1448         return false;
1449     }
1450 
1451     // Assumes that Y is 1.0f.
1452     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1453     skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1454 
1455     skcms_Matrix3x3 toXYZ = {{
1456         { XYZ.vals[0],           0,           0 },
1457         {           0, XYZ.vals[1],           0 },
1458         {           0,           0, XYZ.vals[2] },
1459     }};
1460     toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1461 
1462     skcms_Matrix3x3 DXtoD50;
1463     if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1464         return false;
1465     }
1466 
1467     *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1468     return true;
1469 }
1470 
1471 
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1472 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1473     double a00 = src->vals[0][0],
1474            a01 = src->vals[1][0],
1475            a02 = src->vals[2][0],
1476            a10 = src->vals[0][1],
1477            a11 = src->vals[1][1],
1478            a12 = src->vals[2][1],
1479            a20 = src->vals[0][2],
1480            a21 = src->vals[1][2],
1481            a22 = src->vals[2][2];
1482 
1483     double b0 = a00*a11 - a01*a10,
1484            b1 = a00*a12 - a02*a10,
1485            b2 = a01*a12 - a02*a11,
1486            b3 = a20,
1487            b4 = a21,
1488            b5 = a22;
1489 
1490     double determinant = b0*b5
1491                        - b1*b4
1492                        + b2*b3;
1493 
1494     if (determinant == 0) {
1495         return false;
1496     }
1497 
1498     double invdet = 1.0 / determinant;
1499     if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1500         return false;
1501     }
1502 
1503     b0 *= invdet;
1504     b1 *= invdet;
1505     b2 *= invdet;
1506     b3 *= invdet;
1507     b4 *= invdet;
1508     b5 *= invdet;
1509 
1510     dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1511     dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1512     dst->vals[2][0] = (float)(        +     b2 );
1513     dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1514     dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1515     dst->vals[2][1] = (float)(        -     b1 );
1516     dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1517     dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1518     dst->vals[2][2] = (float)(        +     b0 );
1519 
1520     for (int r = 0; r < 3; ++r)
1521     for (int c = 0; c < 3; ++c) {
1522         if (!isfinitef_(dst->vals[r][c])) {
1523             return false;
1524         }
1525     }
1526     return true;
1527 }
1528 
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1529 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1530     skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1531     for (int r = 0; r < 3; r++)
1532         for (int c = 0; c < 3; c++) {
1533             m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1534                          + A->vals[r][1] * B->vals[1][c]
1535                          + A->vals[r][2] * B->vals[2][c];
1536         }
1537     return m;
1538 }
1539 
1540 #if defined(__clang__)
1541     [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
1542 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1543 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1544     TF_PQish  pq;
1545     TF_HLGish hlg;
1546     switch (classify(*src, &pq, &hlg)) {
1547         case Bad: return false;
1548         case sRGBish: break;  // handled below
1549 
1550         case PQish:
1551             *dst = { TFKind_marker(PQish), -pq.A,  pq.D, 1.0f/pq.F
1552                                          ,  pq.B, -pq.E, 1.0f/pq.C};
1553             return true;
1554 
1555         case HLGish:
1556             *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1557                                              , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1558             return true;
1559 
1560         case HLGinvish:
1561             *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1562                                           , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1563             return true;
1564     }
1565 
1566     assert (classify(*src) == sRGBish);
1567 
1568     // We're inverting this function, solving for x in terms of y.
1569     //   y = (cx + f)         x < d
1570     //       (ax + b)^g + e   x ≥ d
1571     // The inverse of this function can be expressed in the same piecewise form.
1572     skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1573 
1574     // We'll start by finding the new threshold inv.d.
1575     // In principle we should be able to find that by solving for y at x=d from either side.
1576     // (If those two d values aren't the same, it's a discontinuous transfer function.)
1577     float d_l =       src->c * src->d + src->f,
1578           d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1579     if (fabsf_(d_l - d_r) > 1/512.0f) {
1580         return false;
1581     }
1582     inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1583 
1584     // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1585     if (inv.d > 0) {
1586         // Inverting the linear section is pretty straightfoward:
1587         //        y       = cx + f
1588         //        y - f   = cx
1589         //   (1/c)y - f/c = x
1590         inv.c =    1.0f/src->c;
1591         inv.f = -src->f/src->c;
1592     }
1593 
1594     // The interesting part is inverting the nonlinear section:
1595     //         y                = (ax + b)^g + e.
1596     //         y - e            = (ax + b)^g
1597     //        (y - e)^1/g       =  ax + b
1598     //        (y - e)^1/g - b   =  ax
1599     //   (1/a)(y - e)^1/g - b/a =   x
1600     //
1601     // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1602     //   let k = (1/a)^g
1603     //   (1/a)( y -  e)^1/g - b/a = x
1604     //        (ky - ke)^1/g - b/a = x
1605 
1606     float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1607     inv.g = 1.0f / src->g;
1608     inv.a = k;
1609     inv.b = -k * src->e;
1610     inv.e = -src->b / src->a;
1611 
1612     // We need to enforce the same constraints here that we do when fitting a curve,
1613     // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
1614     // of the source function if we're here.
1615 
1616     // Just like when fitting the curve, there's really no way to rescue a < 0.
1617     if (inv.a < 0) {
1618         return false;
1619     }
1620     // On the other hand we can rescue an ad+b that's gone slightly negative here.
1621     if (inv.a * inv.d + inv.b < 0) {
1622         inv.b = -inv.a * inv.d;
1623     }
1624 
1625     // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1626     // where we might still fail here, like non-finite parameter values.
1627     if (classify(inv) != sRGBish) {
1628         return false;
1629     }
1630 
1631     assert (inv.a >= 0);
1632     assert (inv.a * inv.d + inv.b >= 0);
1633 
1634     // Now in principle we're done.
1635     // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1636     // e or f of the inverse, depending on which segment contains src(1.0f).
1637     float s = skcms_TransferFunction_eval(src, 1.0f);
1638     if (!isfinitef_(s)) {
1639         return false;
1640     }
1641 
1642     float sign = s < 0 ? -1.0f : 1.0f;
1643     s *= sign;
1644     if (s < inv.d) {
1645         inv.f = 1.0f - sign * inv.c * s;
1646     } else {
1647         inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1648     }
1649 
1650     *dst = inv;
1651     return classify(*dst) == sRGBish;
1652 }
1653 
1654 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1655 
1656 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1657 //
1658 //   tf(x) =  cx + f          x < d
1659 //   tf(x) = (ax + b)^g + e   x ≥ d
1660 //
1661 // When fitting, we add the additional constraint that both pieces meet at d:
1662 //
1663 //   cd + f = (ad + b)^g + e
1664 //
1665 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1666 //
1667 //   tf(x) =                           cx + f   x < d
1668 //   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
1669 //
1670 // Our overall strategy is then:
1671 //    For a couple tolerances,
1672 //       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
1673 //       - invert c,d,f
1674 //       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1675 //                          (and by constraint, inverted e) to the inverse of the table.
1676 //    Return the parameters with least maximum error.
1677 //
1678 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1679 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1680 //
1681 //    let y = Table(x)
1682 //    r(x) = x - f_inv(y)
1683 //
1684 //    ∂r/∂g = ln(ay + b)*(ay + b)^g
1685 //          - ln(ad + b)*(ad + b)^g
1686 //    ∂r/∂a = yg(ay + b)^(g-1)
1687 //          - dg(ad + b)^(g-1)
1688 //    ∂r/∂b =  g(ay + b)^(g-1)
1689 //          -  g(ad + b)^(g-1)
1690 
1691 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1692 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])1693 static float rg_nonlinear(float x,
1694                           const skcms_Curve* curve,
1695                           const skcms_TransferFunction* tf,
1696                           float dfdP[3]) {
1697     const float y = eval_curve(curve, x);
1698 
1699     const float g = tf->g, a = tf->a, b = tf->b,
1700                 c = tf->c, d = tf->d, f = tf->f;
1701 
1702     const float Y = fmaxf_(a*y + b, 0.0f),
1703                 D =        a*d + b;
1704     assert (D >= 0);
1705 
1706     // The gradient.
1707     dfdP[0] = logf_(Y)*powf_(Y, g)
1708             - logf_(D)*powf_(D, g);
1709     dfdP[1] = y*g*powf_(Y, g-1)
1710             - d*g*powf_(D, g-1);
1711     dfdP[2] =   g*powf_(Y, g-1)
1712             -   g*powf_(D, g-1);
1713 
1714     // The residual.
1715     const float f_inv = powf_(Y, g)
1716                       - powf_(D, g)
1717                       + c*d + f;
1718     return x - f_inv;
1719 }
1720 
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)1721 static bool gauss_newton_step(const skcms_Curve* curve,
1722                                     skcms_TransferFunction* tf,
1723                               float x0, float dx, int N) {
1724     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1725     //
1726     // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
1727     //
1728     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1729     //   where r(P) is the residual vector
1730     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
1731     //
1732     // Let's review the shape of each of these expressions:
1733     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
1734     //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
1735     //   Jf^T   is [3 x N], the transpose of Jf
1736     //
1737     //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1738     //                                              and so is its inverse (Jf^T Jf)^-1
1739     //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1740     //
1741     // Our implementation strategy to get to the final ∆P is
1742     //   1) evaluate Jf^T Jf,   call that lhs
1743     //   2) evaluate Jf^T r(P), call that rhs
1744     //   3) invert lhs
1745     //   4) multiply inverse lhs by rhs
1746     //
1747     // This is a friendly implementation strategy because we don't have to have any
1748     // buffers that scale with N, and equally nice don't have to perform any matrix
1749     // operations that are variable size.
1750     //
1751     // Other implementation strategies could trade this off, e.g. evaluating the
1752     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1753     // the residuals.  That would probably require implementing singular value
1754     // decomposition, and would create a [3 x N] matrix to be multiplied by the
1755     // [N x 1] residual vector, but on the upside I think that'd eliminate the
1756     // possibility of this gauss_newton_step() function ever failing.
1757 
1758     // 0) start off with lhs and rhs safely zeroed.
1759     skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1760     skcms_Vector3   rhs = {  {0,0,0} };
1761 
1762     // 1,2) evaluate lhs and evaluate rhs
1763     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1764     //   so we'll have to update lhs and rhs at the same time.
1765     for (int i = 0; i < N; i++) {
1766         float x = x0 + i*dx;
1767 
1768         float dfdP[3] = {0,0,0};
1769         float resid = rg_nonlinear(x,curve,tf, dfdP);
1770 
1771         for (int r = 0; r < 3; r++) {
1772             for (int c = 0; c < 3; c++) {
1773                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1774             }
1775             rhs.vals[r] += dfdP[r] * resid;
1776         }
1777     }
1778 
1779     // If any of the 3 P parameters are unused, this matrix will be singular.
1780     // Detect those cases and fix them up to indentity instead, so we can invert.
1781     for (int k = 0; k < 3; k++) {
1782         if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1783             lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1784             lhs.vals[k][k] = 1;
1785         }
1786     }
1787 
1788     // 3) invert lhs
1789     skcms_Matrix3x3 lhs_inv;
1790     if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1791         return false;
1792     }
1793 
1794     // 4) multiply inverse lhs by rhs
1795     skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1796     tf->g += dP.vals[0];
1797     tf->a += dP.vals[1];
1798     tf->b += dP.vals[2];
1799     return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
1800 }
1801 
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)1802 static float max_roundtrip_error_checked(const skcms_Curve* curve,
1803                                          const skcms_TransferFunction* tf_inv) {
1804     skcms_TransferFunction tf;
1805     if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
1806         return INFINITY_;
1807     }
1808 
1809     skcms_TransferFunction tf_inv_again;
1810     if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
1811         return INFINITY_;
1812     }
1813 
1814     return skcms_MaxRoundtripError(curve, &tf_inv_again);
1815 }
1816 
1817 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1818 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1819     // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
1820     auto fixup_tf = [tf]() {
1821         // a must be non-negative. That ensures the function is monotonically increasing.
1822         // We don't really know how to fix up a if it goes negative.
1823         if (tf->a < 0) {
1824             return false;
1825         }
1826         // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
1827         // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
1828         if (tf->a * tf->d + tf->b < 0) {
1829             tf->b = -tf->a * tf->d;
1830         }
1831         assert (tf->a >= 0 &&
1832                 tf->a * tf->d + tf->b >= 0);
1833 
1834         // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
1835         // parameter so we can guarantee this.
1836         tf->e =   tf->c*tf->d + tf->f
1837           - powf_(tf->a*tf->d + tf->b, tf->g);
1838 
1839         return true;
1840     };
1841 
1842     if (!fixup_tf()) {
1843         return false;
1844     }
1845 
1846     // No matter where we start, dx should always represent N even steps from 0 to 1.
1847     const float dx = 1.0f / (N-1);
1848 
1849     skcms_TransferFunction best_tf = *tf;
1850     float best_max_error = INFINITY_;
1851 
1852     // Need this or several curves get worse... *sigh*
1853     float init_error = max_roundtrip_error_checked(curve, tf);
1854     if (init_error < best_max_error) {
1855         best_max_error = init_error;
1856         best_tf = *tf;
1857     }
1858 
1859     // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
1860     for (int j = 0; j < 8; j++) {
1861         if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) {
1862             *tf = best_tf;
1863             return isfinitef_(best_max_error);
1864         }
1865 
1866         float max_error = max_roundtrip_error_checked(curve, tf);
1867         if (max_error < best_max_error) {
1868             best_max_error = max_error;
1869             best_tf = *tf;
1870         }
1871     }
1872 
1873     *tf = best_tf;
1874     return isfinitef_(best_max_error);
1875 }
1876 
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1877 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1878                             skcms_TransferFunction* approx,
1879                             float* max_error) {
1880     if (!curve || !approx || !max_error) {
1881         return false;
1882     }
1883 
1884     if (curve->table_entries == 0) {
1885         // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1886         return false;
1887     }
1888 
1889     if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1890         // We need at least two points, and must put some reasonable cap on the maximum number.
1891         return false;
1892     }
1893 
1894     int N = (int)curve->table_entries;
1895     const float dx = 1.0f / (N - 1);
1896 
1897     *max_error = INFINITY_;
1898     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1899     for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1900         skcms_TransferFunction tf,
1901                                tf_inv;
1902 
1903         // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1904         tf.f = 0.0f;
1905         int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1906 
1907         if (L == N) {
1908             // If the entire data set was linear, move the coefficients to the nonlinear portion
1909             // with G == 1.  This lets use a canonical representation with d == 0.
1910             tf.g = 1;
1911             tf.a = tf.c;
1912             tf.b = tf.f;
1913             tf.c = tf.d = tf.e = tf.f = 0;
1914         } else if (L == N - 1) {
1915             // Degenerate case with only two points in the nonlinear segment. Solve directly.
1916             tf.g = 1;
1917             tf.a = (eval_curve(curve, (N-1)*dx) -
1918                     eval_curve(curve, (N-2)*dx))
1919                  / dx;
1920             tf.b = eval_curve(curve, (N-2)*dx)
1921                  - tf.a * (N-2)*dx;
1922             tf.e = 0;
1923         } else {
1924             // Start by guessing a gamma-only curve through the midpoint.
1925             int mid = (L + N) / 2;
1926             float mid_x = mid / (N - 1.0f);
1927             float mid_y = eval_curve(curve, mid_x);
1928             tf.g = log2f_(mid_y) / log2f_(mid_x);
1929             tf.a = 1;
1930             tf.b = 0;
1931             tf.e =    tf.c*tf.d + tf.f
1932               - powf_(tf.a*tf.d + tf.b, tf.g);
1933 
1934 
1935             if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1936                 !fit_nonlinear(curve, L,N, &tf_inv)) {
1937                 continue;
1938             }
1939 
1940             // We fit tf_inv, so calculate tf to keep in sync.
1941             // fit_nonlinear() should guarantee invertibility.
1942             if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1943                 assert(false);
1944                 continue;
1945             }
1946         }
1947 
1948         // We'd better have a sane, sRGB-ish TF by now.
1949         // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
1950         // anything else is just some accident of math and the way we pun tf.g as a type flag.
1951         // fit_nonlinear() should guarantee this, but the special cases may fail this test.
1952         if (sRGBish != classify(tf)) {
1953             continue;
1954         }
1955 
1956         // We find our error by roundtripping the table through tf_inv.
1957         //
1958         // (The most likely use case for this approximation is to be inverted and
1959         // used as the transfer function for a destination color space.)
1960         //
1961         // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1962         // invertible, so re-verify that here (and use the new inverse for testing).
1963         // fit_nonlinear() should guarantee this, but the special cases that don't use
1964         // it may fail this test.
1965         if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1966             continue;
1967         }
1968 
1969         float err = skcms_MaxRoundtripError(curve, &tf_inv);
1970         if (*max_error > err) {
1971             *max_error = err;
1972             *approx    = tf;
1973         }
1974     }
1975     return isfinitef_(*max_error);
1976 }
1977 
1978 // ~~~~ Impl. of skcms_Transform() ~~~~
1979 
1980 typedef enum {
1981     Op_load_a8,
1982     Op_load_g8,
1983     Op_load_8888_palette8,
1984     Op_load_4444,
1985     Op_load_565,
1986     Op_load_888,
1987     Op_load_8888,
1988     Op_load_1010102,
1989     Op_load_161616LE,
1990     Op_load_16161616LE,
1991     Op_load_161616BE,
1992     Op_load_16161616BE,
1993     Op_load_hhh,
1994     Op_load_hhhh,
1995     Op_load_fff,
1996     Op_load_ffff,
1997 
1998     Op_swap_rb,
1999     Op_clamp,
2000     Op_invert,
2001     Op_force_opaque,
2002     Op_premul,
2003     Op_unpremul,
2004     Op_matrix_3x3,
2005     Op_matrix_3x4,
2006     Op_lab_to_xyz,
2007 
2008     Op_tf_r,
2009     Op_tf_g,
2010     Op_tf_b,
2011     Op_tf_a,
2012 
2013     Op_pq_r,
2014     Op_pq_g,
2015     Op_pq_b,
2016     Op_pq_a,
2017 
2018     Op_hlg_r,
2019     Op_hlg_g,
2020     Op_hlg_b,
2021     Op_hlg_a,
2022 
2023     Op_hlginv_r,
2024     Op_hlginv_g,
2025     Op_hlginv_b,
2026     Op_hlginv_a,
2027 
2028     Op_table_r,
2029     Op_table_g,
2030     Op_table_b,
2031     Op_table_a,
2032 
2033     Op_clut,
2034 
2035     Op_store_a8,
2036     Op_store_g8,
2037     Op_store_4444,
2038     Op_store_565,
2039     Op_store_888,
2040     Op_store_8888,
2041     Op_store_1010102,
2042     Op_store_161616LE,
2043     Op_store_16161616LE,
2044     Op_store_161616BE,
2045     Op_store_16161616BE,
2046     Op_store_hhh,
2047     Op_store_hhhh,
2048     Op_store_fff,
2049     Op_store_ffff,
2050 } Op;
2051 
2052 #if defined(__clang__)
2053     template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2054 #elif defined(__GNUC__)
2055     // For some reason GCC accepts this nonsense, but not the more straightforward version,
2056     //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2057     template <int N, typename T>
2058     struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2059 
2060     template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2061 #endif
2062 
2063 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2064 
2065 namespace baseline {
2066 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2067                             || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2068     #define N 1
2069     template <typename T> using V = T;
2070     using Color = float;
2071 #elif defined(__AVX512F__)
2072     #define N 16
2073     template <typename T> using V = Vec<N,T>;
2074     using Color = float;
2075 #elif defined(__AVX__)
2076     #define N 8
2077     template <typename T> using V = Vec<N,T>;
2078     using Color = float;
2079 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2080     #define N 8
2081     template <typename T> using V = Vec<N,T>;
2082     using Color = _Float16;
2083 #else
2084     #define N 4
2085     template <typename T> using V = Vec<N,T>;
2086     using Color = float;
2087 #endif
2088 
2089     #include "src/Transform_inl.h"
2090     #undef N
2091 }
2092 
2093 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2094 #if !defined(SKCMS_PORTABLE) &&                           \
2095     !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) &&           \
2096         (( defined(__clang__) && __clang_major__ >= 5) || \
2097          (!defined(__clang__) && defined(__GNUC__)))      \
2098      && defined(__x86_64__)
2099 
2100     #if !defined(__AVX2__)
2101         #if defined(__clang__)
2102             #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2103         #elif defined(__GNUC__)
2104             #pragma GCC push_options
2105             #pragma GCC target("avx2,f16c")
2106         #endif
2107 
2108         namespace hsw {
2109             #define USING_AVX
2110             #define USING_AVX_F16C
2111             #define USING_AVX2
2112             #define N 8
2113             template <typename T> using V = Vec<N,T>;
2114             using Color = float;
2115 
2116             #include "src/Transform_inl.h"
2117 
2118             // src/Transform_inl.h will undefine USING_* for us.
2119             #undef N
2120         }
2121 
2122         #if defined(__clang__)
2123             #pragma clang attribute pop
2124         #elif defined(__GNUC__)
2125             #pragma GCC pop_options
2126         #endif
2127 
2128         #define TEST_FOR_HSW
2129     #endif
2130 
2131     #if !defined(__AVX512F__)
2132         #if defined(__clang__)
2133             #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2134         #elif defined(__GNUC__)
2135             #pragma GCC push_options
2136             #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2137         #endif
2138 
2139         namespace skx {
2140             #define USING_AVX512F
2141             #define N 16
2142             template <typename T> using V = Vec<N,T>;
2143             using Color = float;
2144 
2145             #include "src/Transform_inl.h"
2146 
2147             // src/Transform_inl.h will undefine USING_* for us.
2148             #undef N
2149         }
2150 
2151         #if defined(__clang__)
2152             #pragma clang attribute pop
2153         #elif defined(__GNUC__)
2154             #pragma GCC pop_options
2155         #endif
2156 
2157         #define TEST_FOR_SKX
2158     #endif
2159 
2160     #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2161         enum class CpuType { None, HSW, SKX };
cpu_type()2162         static CpuType cpu_type() {
2163             static const CpuType type = []{
2164                 if (!runtime_cpu_detection) {
2165                     return CpuType::None;
2166                 }
2167                 // See http://www.sandpile.org/x86/cpuid.htm
2168 
2169                 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2170                 uint32_t eax, ebx, ecx, edx;
2171                 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2172                                              : "0"(1), "2"(0));
2173                 if ((edx & (1u<<25)) &&  // SSE
2174                     (edx & (1u<<26)) &&  // SSE2
2175                     (ecx & (1u<< 0)) &&  // SSE3
2176                     (ecx & (1u<< 9)) &&  // SSSE3
2177                     (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
2178                     (ecx & (1u<<19)) &&  // SSE4.1
2179                     (ecx & (1u<<20)) &&  // SSE4.2
2180                     (ecx & (1u<<26)) &&  // XSAVE
2181                     (ecx & (1u<<27)) &&  // OSXSAVE
2182                     (ecx & (1u<<28)) &&  // AVX
2183                     (ecx & (1u<<29))) {  // F16C
2184 
2185                     // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2186                     __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2187                                                  : "0"(7), "2"(0));
2188                     // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2189                     uint32_t xcr0, dont_need_edx;
2190                     __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2191 
2192                     if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
2193                         (xcr0 & (1u<<2)) &&  // YMM register state saved?
2194                         (ebx  & (1u<<5))) {  // AVX2
2195                         // At this point we're at least HSW.  Continue checking for SKX.
2196                         if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2197                             (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2198                             (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2199                             (ebx  & (1u<<16)) && // AVX512F
2200                             (ebx  & (1u<<17)) && // AVX512DQ
2201                             (ebx  & (1u<<28)) && // AVX512CD
2202                             (ebx  & (1u<<30)) && // AVX512BW
2203                             (ebx  & (1u<<31))) { // AVX512VL
2204                             return CpuType::SKX;
2205                         }
2206                         return CpuType::HSW;
2207                     }
2208                 }
2209                 return CpuType::None;
2210             }();
2211             return type;
2212         }
2213     #endif
2214 
2215 #endif
2216 
2217 typedef struct {
2218     Op          op;
2219     const void* arg;
2220 } OpAndArg;
2221 
select_curve_op(const skcms_Curve * curve,int channel)2222 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2223     static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2224         { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2225         { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2226         { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2227         { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2228     };
2229     const auto& op = ops[channel];
2230 
2231     if (curve->table_entries == 0) {
2232         const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2233 
2234         const skcms_TransferFunction& tf = curve->parametric;
2235 
2236         if (tf.g == 1 && tf.a == 1 &&
2237             tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2238             return noop;
2239         }
2240 
2241         switch (classify(tf)) {
2242             case Bad:        return noop;
2243             case sRGBish:    return OpAndArg{op.sRGBish,   &tf};
2244             case PQish:      return OpAndArg{op.PQish,     &tf};
2245             case HLGish:     return OpAndArg{op.HLGish,    &tf};
2246             case HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
2247         }
2248     }
2249     return OpAndArg{op.table, curve};
2250 }
2251 
bytes_per_pixel(skcms_PixelFormat fmt)2252 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2253     switch (fmt >> 1) {   // ignore rgb/bgr
2254         case skcms_PixelFormat_A_8                >> 1: return  1;
2255         case skcms_PixelFormat_G_8                >> 1: return  1;
2256         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return  1;
2257         case skcms_PixelFormat_ABGR_4444          >> 1: return  2;
2258         case skcms_PixelFormat_RGB_565            >> 1: return  2;
2259         case skcms_PixelFormat_RGB_888            >> 1: return  3;
2260         case skcms_PixelFormat_RGBA_8888          >> 1: return  4;
2261         case skcms_PixelFormat_RGBA_8888_sRGB     >> 1: return  4;
2262         case skcms_PixelFormat_RGBA_1010102       >> 1: return  4;
2263         case skcms_PixelFormat_RGB_161616LE       >> 1: return  6;
2264         case skcms_PixelFormat_RGBA_16161616LE    >> 1: return  8;
2265         case skcms_PixelFormat_RGB_161616BE       >> 1: return  6;
2266         case skcms_PixelFormat_RGBA_16161616BE    >> 1: return  8;
2267         case skcms_PixelFormat_RGB_hhh_Norm       >> 1: return  6;
2268         case skcms_PixelFormat_RGBA_hhhh_Norm     >> 1: return  8;
2269         case skcms_PixelFormat_RGB_hhh            >> 1: return  6;
2270         case skcms_PixelFormat_RGBA_hhhh          >> 1: return  8;
2271         case skcms_PixelFormat_RGB_fff            >> 1: return 12;
2272         case skcms_PixelFormat_RGBA_ffff          >> 1: return 16;
2273     }
2274     assert(false);
2275     return 0;
2276 }
2277 
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2278 static bool prep_for_destination(const skcms_ICCProfile* profile,
2279                                  skcms_Matrix3x3* fromXYZD50,
2280                                  skcms_TransferFunction* invR,
2281                                  skcms_TransferFunction* invG,
2282                                  skcms_TransferFunction* invB) {
2283     // We only support destinations with parametric transfer functions
2284     // and with gamuts that can be transformed from XYZD50.
2285     return profile->has_trc
2286         && profile->has_toXYZD50
2287         && profile->trc[0].table_entries == 0
2288         && profile->trc[1].table_entries == 0
2289         && profile->trc[2].table_entries == 0
2290         && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2291         && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2292         && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2293         && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2294 }
2295 
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2296 bool skcms_Transform(const void*             src,
2297                      skcms_PixelFormat       srcFmt,
2298                      skcms_AlphaFormat       srcAlpha,
2299                      const skcms_ICCProfile* srcProfile,
2300                      void*                   dst,
2301                      skcms_PixelFormat       dstFmt,
2302                      skcms_AlphaFormat       dstAlpha,
2303                      const skcms_ICCProfile* dstProfile,
2304                      size_t                  npixels) {
2305     return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2306                                       dst, dstFmt, dstAlpha, dstProfile,
2307                                       npixels, nullptr);
2308 }
2309 
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2310 bool skcms_TransformWithPalette(const void*             src,
2311                                 skcms_PixelFormat       srcFmt,
2312                                 skcms_AlphaFormat       srcAlpha,
2313                                 const skcms_ICCProfile* srcProfile,
2314                                 void*                   dst,
2315                                 skcms_PixelFormat       dstFmt,
2316                                 skcms_AlphaFormat       dstAlpha,
2317                                 const skcms_ICCProfile* dstProfile,
2318                                 size_t                  nz,
2319                                 const void*             palette) {
2320     const size_t dst_bpp = bytes_per_pixel(dstFmt),
2321                  src_bpp = bytes_per_pixel(srcFmt);
2322     // Let's just refuse if the request is absurdly big.
2323     if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2324         return false;
2325     }
2326     int n = (int)nz;
2327 
2328     // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2329     if (!srcProfile) {
2330         srcProfile = skcms_sRGB_profile();
2331     }
2332     if (!dstProfile) {
2333         dstProfile = skcms_sRGB_profile();
2334     }
2335 
2336     // We can't transform in place unless the PixelFormats are the same size.
2337     if (dst == src && dst_bpp != src_bpp) {
2338         return false;
2339     }
2340     // TODO: more careful alias rejection (like, dst == src + 1)?
2341 
2342     if (needs_palette(srcFmt) && !palette) {
2343         return false;
2344     }
2345 
2346     Op          program  [32];
2347     const void* arguments[32];
2348 
2349     Op*          ops  = program;
2350     const void** args = arguments;
2351 
2352     // These are always parametric curves of some sort.
2353     skcms_Curve dst_curves[3];
2354     dst_curves[0].table_entries =
2355     dst_curves[1].table_entries =
2356     dst_curves[2].table_entries = 0;
2357 
2358     skcms_Matrix3x3        from_xyz;
2359 
2360     switch (srcFmt >> 1) {
2361         default: return false;
2362         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_load_a8;         break;
2363         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_load_g8;         break;
2364         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_load_4444;       break;
2365         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_load_565;        break;
2366         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_load_888;        break;
2367         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_load_8888;       break;
2368         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_load_1010102;    break;
2369         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_load_161616LE;   break;
2370         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2371         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_load_161616BE;   break;
2372         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2373         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_load_hhh;        break;
2374         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_load_hhhh;       break;
2375         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_load_hhh;        break;
2376         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_load_hhhh;       break;
2377         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_load_fff;        break;
2378         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_load_ffff;       break;
2379 
2380         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++  = Op_load_8888_palette8;
2381                                                         *args++ = palette;
2382                                                         break;
2383         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2384             *ops++ = Op_load_8888;
2385             *ops++ = Op_tf_r;       *args++ = skcms_sRGB_TransferFunction();
2386             *ops++ = Op_tf_g;       *args++ = skcms_sRGB_TransferFunction();
2387             *ops++ = Op_tf_b;       *args++ = skcms_sRGB_TransferFunction();
2388             break;
2389     }
2390     if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2391         srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2392         *ops++ = Op_clamp;
2393     }
2394     if (srcFmt & 1) {
2395         *ops++ = Op_swap_rb;
2396     }
2397     skcms_ICCProfile gray_dst_profile;
2398     if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2399         // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2400         // luminance (Y) by the destination transfer function.
2401         gray_dst_profile = *dstProfile;
2402         skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2403         dstProfile = &gray_dst_profile;
2404     }
2405 
2406     if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2407         // Photoshop creates CMYK images as inverse CMYK.
2408         // These happen to be the only ones we've _ever_ seen.
2409         *ops++ = Op_invert;
2410         // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2411         srcAlpha = skcms_AlphaFormat_Unpremul;
2412     }
2413 
2414     if (srcAlpha == skcms_AlphaFormat_Opaque) {
2415         *ops++ = Op_force_opaque;
2416     } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2417         *ops++ = Op_unpremul;
2418     }
2419 
2420     if (dstProfile != srcProfile) {
2421 
2422         if (!prep_for_destination(dstProfile,
2423                                   &from_xyz,
2424                                   &dst_curves[0].parametric,
2425                                   &dst_curves[1].parametric,
2426                                   &dst_curves[2].parametric)) {
2427             return false;
2428         }
2429 
2430         if (srcProfile->has_A2B) {
2431             if (srcProfile->A2B.input_channels) {
2432                 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2433                     OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2434                     if (oa.arg) {
2435                         *ops++  = oa.op;
2436                         *args++ = oa.arg;
2437                     }
2438                 }
2439                 *ops++ = Op_clamp;
2440                 *ops++ = Op_clut;
2441                 *args++ = &srcProfile->A2B;
2442             }
2443 
2444             if (srcProfile->A2B.matrix_channels == 3) {
2445                 for (int i = 0; i < 3; i++) {
2446                     OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2447                     if (oa.arg) {
2448                         *ops++  = oa.op;
2449                         *args++ = oa.arg;
2450                     }
2451                 }
2452 
2453                 static const skcms_Matrix3x4 I = {{
2454                     {1,0,0,0},
2455                     {0,1,0,0},
2456                     {0,0,1,0},
2457                 }};
2458                 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2459                     *ops++  = Op_matrix_3x4;
2460                     *args++ = &srcProfile->A2B.matrix;
2461                 }
2462             }
2463 
2464             if (srcProfile->A2B.output_channels == 3) {
2465                 for (int i = 0; i < 3; i++) {
2466                     OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2467                     if (oa.arg) {
2468                         *ops++  = oa.op;
2469                         *args++ = oa.arg;
2470                     }
2471                 }
2472             }
2473 
2474             if (srcProfile->pcs == skcms_Signature_Lab) {
2475                 *ops++ = Op_lab_to_xyz;
2476             }
2477 
2478         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2479             for (int i = 0; i < 3; i++) {
2480                 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2481                 if (oa.arg) {
2482                     *ops++  = oa.op;
2483                     *args++ = oa.arg;
2484                 }
2485             }
2486         } else {
2487             return false;
2488         }
2489 
2490         // A2B sources should already be in XYZD50 at this point.
2491         // Others still need to be transformed using their toXYZD50 matrix.
2492         // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2493         // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2494         assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2495         static const skcms_Matrix3x3 I = {{
2496             { 1.0f, 0.0f, 0.0f },
2497             { 0.0f, 1.0f, 0.0f },
2498             { 0.0f, 0.0f, 1.0f },
2499         }};
2500         const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2501 
2502         // There's a chance the source and destination gamuts are identical,
2503         // in which case we can skip the gamut transform.
2504         if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2505             // Concat the entire gamut transform into from_xyz,
2506             // now slightly misnamed but it's a handy spot to stash the result.
2507             from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2508             *ops++  = Op_matrix_3x3;
2509             *args++ = &from_xyz;
2510         }
2511 
2512         // Encode back to dst RGB using its parametric transfer functions.
2513         for (int i = 0; i < 3; i++) {
2514             OpAndArg oa = select_curve_op(dst_curves+i, i);
2515             if (oa.arg) {
2516                 assert (oa.op != Op_table_r &&
2517                         oa.op != Op_table_g &&
2518                         oa.op != Op_table_b &&
2519                         oa.op != Op_table_a);
2520                 *ops++  = oa.op;
2521                 *args++ = oa.arg;
2522             }
2523         }
2524     }
2525 
2526     // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2527     // not just to values that fit in [0,1].
2528     //
2529     // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2530     // but would be carrying r > 1, which is really unexpected for downstream consumers.
2531     if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2532         *ops++ = Op_clamp;
2533     }
2534     if (dstAlpha == skcms_AlphaFormat_Opaque) {
2535         *ops++ = Op_force_opaque;
2536     } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2537         *ops++ = Op_premul;
2538     }
2539     if (dstFmt & 1) {
2540         *ops++ = Op_swap_rb;
2541     }
2542     switch (dstFmt >> 1) {
2543         default: return false;
2544         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_store_a8;         break;
2545         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_store_g8;         break;
2546         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_store_4444;       break;
2547         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_store_565;        break;
2548         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_store_888;        break;
2549         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_store_8888;       break;
2550         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_store_1010102;    break;
2551         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_store_161616LE;   break;
2552         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2553         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_store_161616BE;   break;
2554         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2555         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_store_hhh;        break;
2556         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_store_hhhh;       break;
2557         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_store_hhh;        break;
2558         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_store_hhhh;       break;
2559         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_store_fff;        break;
2560         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_store_ffff;       break;
2561 
2562         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2563             *ops++ = Op_tf_r;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2564             *ops++ = Op_tf_g;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2565             *ops++ = Op_tf_b;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2566             *ops++ = Op_store_8888;
2567             break;
2568     }
2569 
2570     auto run = baseline::run_program;
2571 #if defined(TEST_FOR_HSW)
2572     switch (cpu_type()) {
2573         case CpuType::None:                        break;
2574         case CpuType::HSW: run = hsw::run_program; break;
2575         case CpuType::SKX: run = hsw::run_program; break;
2576     }
2577 #endif
2578 #if defined(TEST_FOR_SKX)
2579     switch (cpu_type()) {
2580         case CpuType::None:                        break;
2581         case CpuType::HSW:                         break;
2582         case CpuType::SKX: run = skx::run_program; break;
2583     }
2584 #endif
2585     run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2586     return true;
2587 }
2588 
assert_usable_as_destination(const skcms_ICCProfile * profile)2589 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2590 #if defined(NDEBUG)
2591     (void)profile;
2592 #else
2593     skcms_Matrix3x3 fromXYZD50;
2594     skcms_TransferFunction invR, invG, invB;
2595     assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2596 #endif
2597 }
2598 
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2599 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2600     skcms_Matrix3x3 fromXYZD50;
2601     if (!profile->has_trc || !profile->has_toXYZD50
2602         || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2603         return false;
2604     }
2605 
2606     skcms_TransferFunction tf[3];
2607     for (int i = 0; i < 3; i++) {
2608         skcms_TransferFunction inv;
2609         if (profile->trc[i].table_entries == 0
2610             && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2611             tf[i] = profile->trc[i].parametric;
2612             continue;
2613         }
2614 
2615         float max_error;
2616         // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2617         if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2618             return false;
2619         }
2620     }
2621 
2622     for (int i = 0; i < 3; ++i) {
2623         profile->trc[i].table_entries = 0;
2624         profile->trc[i].parametric = tf[i];
2625     }
2626 
2627     assert_usable_as_destination(profile);
2628     return true;
2629 }
2630 
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2631 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2632     // Operate on a copy of profile, so we can choose the best TF for the original curves
2633     skcms_ICCProfile result = *profile;
2634     if (!skcms_MakeUsableAsDestination(&result)) {
2635         return false;
2636     }
2637 
2638     int best_tf = 0;
2639     float min_max_error = INFINITY_;
2640     for (int i = 0; i < 3; i++) {
2641         skcms_TransferFunction inv;
2642         if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2643             return false;
2644         }
2645 
2646         float err = 0;
2647         for (int j = 0; j < 3; ++j) {
2648             err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2649         }
2650         if (min_max_error > err) {
2651             min_max_error = err;
2652             best_tf = i;
2653         }
2654     }
2655 
2656     for (int i = 0; i < 3; i++) {
2657         result.trc[i].parametric = result.trc[best_tf].parametric;
2658     }
2659 
2660     *profile = result;
2661     assert_usable_as_destination(profile);
2662     return true;
2663 }
2664