1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #if defined(__ARM_NEON)
17     #include <arm_neon.h>
18 #elif defined(__SSE__)
19     #include <immintrin.h>
20 
21     #if defined(__clang__)
22         // That #include <immintrin.h> is usually enough, but Clang's headers
23         // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24         // defined, because lots of programs on Windows would include that and
25         // it'd be a lot slower.  But we want all those headers included so we
26         // can use their features after runtime checks later.
27         #include <smmintrin.h>
28         #include <avxintrin.h>
29         #include <avx2intrin.h>
30         #include <avx512fintrin.h>
31         #include <avx512dqintrin.h>
32     #endif
33 #endif
34 
35 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
36 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
37 //
38 // Please do not use sizeof() directly, and size_t only when required.
39 // (We have no way of enforcing these requests...)
40 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
41 
42 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
43 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
44 
45 static const union {
46     uint32_t bits;
47     float    f;
48 } inf_ = { 0x7f800000 };
49 #define INFINITY_ inf_.f
50 
51 #if defined(__clang__) || defined(__GNUC__)
52     #define small_memcpy __builtin_memcpy
53 #else
54     #define small_memcpy memcpy
55 #endif
56 
log2f_(float x)57 static float log2f_(float x) {
58     // The first approximation of log2(x) is its exponent 'e', minus 127.
59     int32_t bits;
60     small_memcpy(&bits, &x, sizeof(bits));
61 
62     float e = (float)bits * (1.0f / (1<<23));
63 
64     // If we use the mantissa too we can refine the error signficantly.
65     int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
66     float m;
67     small_memcpy(&m, &m_bits, sizeof(m));
68 
69     return (e - 124.225514990f
70               -   1.498030302f*m
71               -   1.725879990f/(0.3520887068f + m));
72 }
logf_(float x)73 static float logf_(float x) {
74     const float ln2 = 0.69314718f;
75     return ln2*log2f_(x);
76 }
77 
exp2f_(float x)78 static float exp2f_(float x) {
79     float fract = x - floorf_(x);
80 
81     float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
82                                         -   1.490129070f*fract
83                                         +  27.728023300f/(4.84252568f - fract));
84 
85     // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
86     // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
87     // INT_MIN is a power of 2 and exactly representable as a float, so it's fine.
88     if (fbits >= (float)INT_MAX) {
89         return INFINITY_;
90     } else if (fbits < (float)INT_MIN) {
91         return -INFINITY_;
92     }
93 
94     int32_t bits = (int32_t)fbits;
95     small_memcpy(&x, &bits, sizeof(x));
96     return x;
97 }
98 
99 // Not static, as it's used by some test tools.
powf_(float x,float y)100 float powf_(float x, float y) {
101     assert (x >= 0);
102     return (x == 0) || (x == 1) ? x
103                                 : exp2f_(log2f_(x) * y);
104 }
105 
expf_(float x)106 static float expf_(float x) {
107     const float log2_e = 1.4426950408889634074f;
108     return exp2f_(log2_e * x);
109 }
110 
fmaxf_(float x,float y)111 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)112 static float fminf_(float x, float y) { return x < y ? x : y; }
113 
isfinitef_(float x)114 static bool isfinitef_(float x) { return 0 == x*0; }
115 
minus_1_ulp(float x)116 static float minus_1_ulp(float x) {
117     int32_t bits;
118     memcpy(&bits, &x, sizeof(bits));
119     bits = bits - 1;
120     memcpy(&x, &bits, sizeof(bits));
121     return x;
122 }
123 
124 // Most transfer functions we work with are sRGBish.
125 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
126 // and repurpose the other fields to hold the parameters of the HDR functions.
127 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
128 struct TF_PQish  { float A,B,C,D,E,F; };
129 struct TF_HLGish { float R,G,a,b,c; };
130 
TFKind_marker(TFKind kind)131 static float TFKind_marker(TFKind kind) {
132     // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
133     return -(float)kind;
134 }
135 
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)136 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
137                                                        , TF_HLGish* hlg = nullptr) {
138     if (tf.g < 0 && (int)tf.g == tf.g) {
139         // TODO: sanity checks for PQ/HLG like we do for sRGBish.
140         switch ((int)tf.g) {
141             case -PQish:     if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
142             case -HLGish:    if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
143             case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
144         }
145         return Bad;
146     }
147 
148     // Basic sanity checks for sRGBish transfer functions.
149     if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
150             // a,c,d,g should be non-negative to make any sense.
151             && tf.a >= 0
152             && tf.c >= 0
153             && tf.d >= 0
154             && tf.g >= 0
155             // Raising a negative value to a fractional tf->g produces complex numbers.
156             && tf.a * tf.d + tf.b >= 0) {
157         return sRGBish;
158     }
159 
160     return Bad;
161 }
162 
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)163 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
164                                       float A, float B, float C,
165                                       float D, float E, float F) {
166     *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
167     assert(classify(*tf) == PQish);
168     return true;
169 }
170 
skcms_TransferFunction_makeHLGish(skcms_TransferFunction * tf,float R,float G,float a,float b,float c)171 bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf,
172                                        float R, float G,
173                                        float a, float b, float c) {
174     *tf = { TFKind_marker(HLGish), R,G, a,b,c, 0 };
175     assert(classify(*tf) == HLGish);
176     return true;
177 }
178 
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)179 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
180     float sign = x < 0 ? -1.0f : 1.0f;
181     x *= sign;
182 
183     TF_PQish  pq;
184     TF_HLGish hlg;
185     switch (classify(*tf, &pq, &hlg)) {
186         case Bad:       break;
187 
188         case HLGish:    return sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
189                                                     : expf_((x-hlg.c)*hlg.a) + hlg.b);
190 
191         // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
192         case HLGinvish: return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
193                                               : hlg.a * logf_(x - hlg.b) + hlg.c);
194 
195 
196         case sRGBish: return sign * (x < tf->d ?       tf->c * x + tf->f
197                                                : powf_(tf->a * x + tf->b, tf->g) + tf->e);
198 
199         case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
200                                             / (pq.D + pq.E * powf_(x, pq.C)),
201                                         pq.F);
202     }
203     return 0;
204 }
205 
206 
eval_curve(const skcms_Curve * curve,float x)207 static float eval_curve(const skcms_Curve* curve, float x) {
208     if (curve->table_entries == 0) {
209         return skcms_TransferFunction_eval(&curve->parametric, x);
210     }
211 
212     float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
213     int   lo = (int)                   ix        ,
214           hi = (int)(float)minus_1_ulp(ix + 1.0f);
215     float t = ix - (float)lo;
216 
217     float l, h;
218     if (curve->table_8) {
219         l = curve->table_8[lo] * (1/255.0f);
220         h = curve->table_8[hi] * (1/255.0f);
221     } else {
222         uint16_t be_l, be_h;
223         memcpy(&be_l, curve->table_16 + 2*lo, 2);
224         memcpy(&be_h, curve->table_16 + 2*hi, 2);
225 #if __BIG_ENDIAN__
226         l = be_l * (1/65535.0f);
227         h = be_h * (1/65535.0f);
228 #else
229         uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
230         uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
231         l = le_l * (1/65535.0f);
232         h = le_h * (1/65535.0f);
233 #endif
234     }
235     return l + (h-l)*t;
236 }
237 
skcms_MaxRoundtripError(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)238 float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
239     uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
240     const float dx = 1.0f / (N - 1);
241     float err = 0;
242     for (uint32_t i = 0; i < N; i++) {
243         float x = i * dx,
244               y = eval_curve(curve, x);
245         err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
246     }
247     return err;
248 }
249 
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)250 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
251     return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
252 }
253 
254 // Additional ICC signature values that are only used internally
255 enum {
256     // File signature
257     skcms_Signature_acsp = 0x61637370,
258 
259     // Tag signatures
260     skcms_Signature_rTRC = 0x72545243,
261     skcms_Signature_gTRC = 0x67545243,
262     skcms_Signature_bTRC = 0x62545243,
263     skcms_Signature_kTRC = 0x6B545243,
264 
265     skcms_Signature_rXYZ = 0x7258595A,
266     skcms_Signature_gXYZ = 0x6758595A,
267     skcms_Signature_bXYZ = 0x6258595A,
268 
269     skcms_Signature_A2B0 = 0x41324230,
270     skcms_Signature_A2B1 = 0x41324231,
271     skcms_Signature_mAB  = 0x6D414220,
272 
273     skcms_Signature_CHAD = 0x63686164,
274     skcms_Signature_WTPT = 0x77747074,
275 
276     // Type signatures
277     skcms_Signature_curv = 0x63757276,
278     skcms_Signature_mft1 = 0x6D667431,
279     skcms_Signature_mft2 = 0x6D667432,
280     skcms_Signature_para = 0x70617261,
281     skcms_Signature_sf32 = 0x73663332,
282     // XYZ is also a PCS signature, so it's defined in skcms.h
283     // skcms_Signature_XYZ = 0x58595A20,
284 };
285 
read_big_u16(const uint8_t * ptr)286 static uint16_t read_big_u16(const uint8_t* ptr) {
287     uint16_t be;
288     memcpy(&be, ptr, sizeof(be));
289 #if __BIG_ENDIAN__
290     return be;
291 #else
292 #if defined(_MSC_VER)
293     return _byteswap_ushort(be);
294 #else
295     return __builtin_bswap16(be);
296 #endif
297 #endif
298 }
299 
read_big_u32(const uint8_t * ptr)300 static uint32_t read_big_u32(const uint8_t* ptr) {
301     uint32_t be;
302     memcpy(&be, ptr, sizeof(be));
303 #if __BIG_ENDIAN__
304     return be;
305 #else
306 #if defined(_MSC_VER)
307     return _byteswap_ulong(be);
308 #else
309     return __builtin_bswap32(be);
310 #endif
311 #endif
312 }
313 
read_big_i32(const uint8_t * ptr)314 static int32_t read_big_i32(const uint8_t* ptr) {
315     return (int32_t)read_big_u32(ptr);
316 }
317 
read_big_fixed(const uint8_t * ptr)318 static float read_big_fixed(const uint8_t* ptr) {
319     return read_big_i32(ptr) * (1.0f / 65536.0f);
320 }
321 
322 // Maps to an in-memory profile so that fields line up to the locations specified
323 // in ICC.1:2010, section 7.2
324 typedef struct {
325     uint8_t size                [ 4];
326     uint8_t cmm_type            [ 4];
327     uint8_t version             [ 4];
328     uint8_t profile_class       [ 4];
329     uint8_t data_color_space    [ 4];
330     uint8_t pcs                 [ 4];
331     uint8_t creation_date_time  [12];
332     uint8_t signature           [ 4];
333     uint8_t platform            [ 4];
334     uint8_t flags               [ 4];
335     uint8_t device_manufacturer [ 4];
336     uint8_t device_model        [ 4];
337     uint8_t device_attributes   [ 8];
338     uint8_t rendering_intent    [ 4];
339     uint8_t illuminant_X        [ 4];
340     uint8_t illuminant_Y        [ 4];
341     uint8_t illuminant_Z        [ 4];
342     uint8_t creator             [ 4];
343     uint8_t profile_id          [16];
344     uint8_t reserved            [28];
345     uint8_t tag_count           [ 4]; // Technically not part of header, but required
346 } header_Layout;
347 
348 typedef struct {
349     uint8_t signature [4];
350     uint8_t offset    [4];
351     uint8_t size      [4];
352 } tag_Layout;
353 
get_tag_table(const skcms_ICCProfile * profile)354 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
355     return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
356 }
357 
358 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
359 // use of the type is for the CHAD tag that stores exactly nine values.
360 typedef struct {
361     uint8_t type     [ 4];
362     uint8_t reserved [ 4];
363     uint8_t values   [36];
364 } sf32_Layout;
365 
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)366 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
367     skcms_ICCTag tag;
368     if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
369         return false;
370     }
371 
372     if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
373         return false;
374     }
375 
376     const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
377     const uint8_t* values = sf32Tag->values;
378     for (int r = 0; r < 3; ++r)
379     for (int c = 0; c < 3; ++c, values += 4) {
380         m->vals[r][c] = read_big_fixed(values);
381     }
382     return true;
383 }
384 
385 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
386 // the type are for tags/data that store exactly one triple.
387 typedef struct {
388     uint8_t type     [4];
389     uint8_t reserved [4];
390     uint8_t X        [4];
391     uint8_t Y        [4];
392     uint8_t Z        [4];
393 } XYZ_Layout;
394 
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)395 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
396     if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
397         return false;
398     }
399 
400     const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
401 
402     *x = read_big_fixed(xyzTag->X);
403     *y = read_big_fixed(xyzTag->Y);
404     *z = read_big_fixed(xyzTag->Z);
405     return true;
406 }
407 
skcms_GetWTPT(const skcms_ICCProfile * profile,float xyz[3])408 bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
409     skcms_ICCTag tag;
410     return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
411            read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
412 }
413 
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)414 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
415                            const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
416     return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
417            read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
418            read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
419 }
420 
421 typedef struct {
422     uint8_t type          [4];
423     uint8_t reserved_a    [4];
424     uint8_t function_type [2];
425     uint8_t reserved_b    [2];
426     uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
427 } para_Layout;
428 
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)429 static bool read_curve_para(const uint8_t* buf, uint32_t size,
430                             skcms_Curve* curve, uint32_t* curve_size) {
431     if (size < SAFE_FIXED_SIZE(para_Layout)) {
432         return false;
433     }
434 
435     const para_Layout* paraTag = (const para_Layout*)buf;
436 
437     enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
438     uint16_t function_type = read_big_u16(paraTag->function_type);
439     if (function_type > kGABCDEF) {
440         return false;
441     }
442 
443     static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
444     if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
445         return false;
446     }
447 
448     if (curve_size) {
449         *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
450     }
451 
452     curve->table_entries = 0;
453     curve->parametric.a  = 1.0f;
454     curve->parametric.b  = 0.0f;
455     curve->parametric.c  = 0.0f;
456     curve->parametric.d  = 0.0f;
457     curve->parametric.e  = 0.0f;
458     curve->parametric.f  = 0.0f;
459     curve->parametric.g  = read_big_fixed(paraTag->variable);
460 
461     switch (function_type) {
462         case kGAB:
463             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
464             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
465             if (curve->parametric.a == 0) {
466                 return false;
467             }
468             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
469             break;
470         case kGABC:
471             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
472             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
473             curve->parametric.e = read_big_fixed(paraTag->variable + 12);
474             if (curve->parametric.a == 0) {
475                 return false;
476             }
477             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
478             curve->parametric.f = curve->parametric.e;
479             break;
480         case kGABCD:
481             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
482             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
483             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
484             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
485             break;
486         case kGABCDEF:
487             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
488             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
489             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
490             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
491             curve->parametric.e = read_big_fixed(paraTag->variable + 20);
492             curve->parametric.f = read_big_fixed(paraTag->variable + 24);
493             break;
494     }
495     return classify(curve->parametric) == sRGBish;
496 }
497 
498 typedef struct {
499     uint8_t type          [4];
500     uint8_t reserved      [4];
501     uint8_t value_count   [4];
502     uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
503 } curv_Layout;
504 
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)505 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
506                             skcms_Curve* curve, uint32_t* curve_size) {
507     if (size < SAFE_FIXED_SIZE(curv_Layout)) {
508         return false;
509     }
510 
511     const curv_Layout* curvTag = (const curv_Layout*)buf;
512 
513     uint32_t value_count = read_big_u32(curvTag->value_count);
514     if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
515         return false;
516     }
517 
518     if (curve_size) {
519         *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
520     }
521 
522     if (value_count < 2) {
523         curve->table_entries = 0;
524         curve->parametric.a  = 1.0f;
525         curve->parametric.b  = 0.0f;
526         curve->parametric.c  = 0.0f;
527         curve->parametric.d  = 0.0f;
528         curve->parametric.e  = 0.0f;
529         curve->parametric.f  = 0.0f;
530         if (value_count == 0) {
531             // Empty tables are a shorthand for an identity curve
532             curve->parametric.g = 1.0f;
533         } else {
534             // Single entry tables are a shorthand for simple gamma
535             curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
536         }
537     } else {
538         curve->table_8       = nullptr;
539         curve->table_16      = curvTag->variable;
540         curve->table_entries = value_count;
541     }
542 
543     return true;
544 }
545 
546 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
547 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)548 static bool read_curve(const uint8_t* buf, uint32_t size,
549                        skcms_Curve* curve, uint32_t* curve_size) {
550     if (!buf || size < 4 || !curve) {
551         return false;
552     }
553 
554     uint32_t type = read_big_u32(buf);
555     if (type == skcms_Signature_para) {
556         return read_curve_para(buf, size, curve, curve_size);
557     } else if (type == skcms_Signature_curv) {
558         return read_curve_curv(buf, size, curve, curve_size);
559     }
560 
561     return false;
562 }
563 
564 // mft1 and mft2 share a large chunk of data
565 typedef struct {
566     uint8_t type                 [ 4];
567     uint8_t reserved_a           [ 4];
568     uint8_t input_channels       [ 1];
569     uint8_t output_channels      [ 1];
570     uint8_t grid_points          [ 1];
571     uint8_t reserved_b           [ 1];
572     uint8_t matrix               [36];
573 } mft_CommonLayout;
574 
575 typedef struct {
576     mft_CommonLayout common      [1];
577 
578     uint8_t variable             [1/*variable*/];
579 } mft1_Layout;
580 
581 typedef struct {
582     mft_CommonLayout common      [1];
583 
584     uint8_t input_table_entries  [2];
585     uint8_t output_table_entries [2];
586     uint8_t variable             [1/*variable*/];
587 } mft2_Layout;
588 
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)589 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
590     // MFT matrices are applied before the first set of curves, but must be identity unless the
591     // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
592     // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
593     // field/flag.
594     a2b->matrix_channels = 0;
595 
596     a2b->input_channels  = mftTag->input_channels[0];
597     a2b->output_channels = mftTag->output_channels[0];
598 
599     // We require exactly three (ie XYZ/Lab/RGB) output channels
600     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
601         return false;
602     }
603     // We require at least one, and no more than four (ie CMYK) input channels
604     if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
605         return false;
606     }
607 
608     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
609         a2b->grid_points[i] = mftTag->grid_points[0];
610     }
611     // The grid only makes sense with at least two points along each axis
612     if (a2b->grid_points[0] < 2) {
613         return false;
614     }
615 
616     return true;
617 }
618 
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)619 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
620                             uint32_t input_table_entries, uint32_t output_table_entries,
621                             skcms_A2B* a2b) {
622     // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
623     uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
624     uint32_t byte_len_per_output_table = output_table_entries * byte_width;
625 
626     // [input|output]_channels are <= 4, so still no overflow
627     uint32_t byte_len_all_input_tables  = a2b->input_channels * byte_len_per_input_table;
628     uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
629 
630     uint64_t grid_size = a2b->output_channels * byte_width;
631     for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
632         grid_size *= a2b->grid_points[axis];
633     }
634 
635     if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
636         return false;
637     }
638 
639     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
640         a2b->input_curves[i].table_entries = input_table_entries;
641         if (byte_width == 1) {
642             a2b->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
643             a2b->input_curves[i].table_16 = nullptr;
644         } else {
645             a2b->input_curves[i].table_8  = nullptr;
646             a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
647         }
648     }
649 
650     if (byte_width == 1) {
651         a2b->grid_8  = table_base + byte_len_all_input_tables;
652         a2b->grid_16 = nullptr;
653     } else {
654         a2b->grid_8  = nullptr;
655         a2b->grid_16 = table_base + byte_len_all_input_tables;
656     }
657 
658     const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
659     for (uint32_t i = 0; i < a2b->output_channels; ++i) {
660         a2b->output_curves[i].table_entries = output_table_entries;
661         if (byte_width == 1) {
662             a2b->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
663             a2b->output_curves[i].table_16 = nullptr;
664         } else {
665             a2b->output_curves[i].table_8  = nullptr;
666             a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
667         }
668     }
669 
670     return true;
671 }
672 
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)673 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
674     if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
675         return false;
676     }
677 
678     const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
679     if (!read_mft_common(mftTag->common, a2b)) {
680         return false;
681     }
682 
683     uint32_t input_table_entries  = 256;
684     uint32_t output_table_entries = 256;
685 
686     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
687                            input_table_entries, output_table_entries, a2b);
688 }
689 
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)690 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
691     if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
692         return false;
693     }
694 
695     const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
696     if (!read_mft_common(mftTag->common, a2b)) {
697         return false;
698     }
699 
700     uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
701     uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
702 
703     // ICC spec mandates that 2 <= table_entries <= 4096
704     if (input_table_entries < 2 || input_table_entries > 4096 ||
705         output_table_entries < 2 || output_table_entries > 4096) {
706         return false;
707     }
708 
709     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
710                            input_table_entries, output_table_entries, a2b);
711 }
712 
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)713 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
714                         uint32_t num_curves, skcms_Curve* curves) {
715     for (uint32_t i = 0; i < num_curves; ++i) {
716         if (curve_offset > size) {
717             return false;
718         }
719 
720         uint32_t curve_bytes;
721         if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
722             return false;
723         }
724 
725         if (curve_bytes > UINT32_MAX - 3) {
726             return false;
727         }
728         curve_bytes = (curve_bytes + 3) & ~3U;
729 
730         uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
731         curve_offset = (uint32_t)new_offset_64;
732         if (new_offset_64 != curve_offset) {
733             return false;
734         }
735     }
736 
737     return true;
738 }
739 
740 typedef struct {
741     uint8_t type                 [ 4];
742     uint8_t reserved_a           [ 4];
743     uint8_t input_channels       [ 1];
744     uint8_t output_channels      [ 1];
745     uint8_t reserved_b           [ 2];
746     uint8_t b_curve_offset       [ 4];
747     uint8_t matrix_offset        [ 4];
748     uint8_t m_curve_offset       [ 4];
749     uint8_t clut_offset          [ 4];
750     uint8_t a_curve_offset       [ 4];
751 } mAB_Layout;
752 
753 typedef struct {
754     uint8_t grid_points          [16];
755     uint8_t grid_byte_width      [ 1];
756     uint8_t reserved             [ 3];
757     uint8_t variable             [1/*variable*/];
758 } mABCLUT_Layout;
759 
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)760 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
761     if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
762         return false;
763     }
764 
765     const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
766 
767     a2b->input_channels  = mABTag->input_channels[0];
768     a2b->output_channels = mABTag->output_channels[0];
769 
770     // We require exactly three (ie XYZ/Lab/RGB) output channels
771     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
772         return false;
773     }
774     // We require no more than four (ie CMYK) input channels
775     if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
776         return false;
777     }
778 
779     uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
780     uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
781     uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
782     uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
783     uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
784 
785     // "B" curves must be present
786     if (0 == b_curve_offset) {
787         return false;
788     }
789 
790     if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
791                      a2b->output_curves)) {
792         return false;
793     }
794 
795     // "M" curves and Matrix must be used together
796     if (0 != m_curve_offset) {
797         if (0 == matrix_offset) {
798             return false;
799         }
800         a2b->matrix_channels = a2b->output_channels;
801         if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
802                          a2b->matrix_curves)) {
803             return false;
804         }
805 
806         // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
807         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
808             return false;
809         }
810         float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
811         const uint8_t* mtx_buf = tag->buf + matrix_offset;
812         a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
813         a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
814         a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
815         a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
816         a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
817         a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
818         a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
819         a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
820         a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
821         a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
822         a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
823         a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
824     } else {
825         if (0 != matrix_offset) {
826             return false;
827         }
828         a2b->matrix_channels = 0;
829     }
830 
831     // "A" curves and CLUT must be used together
832     if (0 != a_curve_offset) {
833         if (0 == clut_offset) {
834             return false;
835         }
836         if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
837                          a2b->input_curves)) {
838             return false;
839         }
840 
841         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
842             return false;
843         }
844         const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
845 
846         if (clut->grid_byte_width[0] == 1) {
847             a2b->grid_8  = clut->variable;
848             a2b->grid_16 = nullptr;
849         } else if (clut->grid_byte_width[0] == 2) {
850             a2b->grid_8  = nullptr;
851             a2b->grid_16 = clut->variable;
852         } else {
853             return false;
854         }
855 
856         uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
857         for (uint32_t i = 0; i < a2b->input_channels; ++i) {
858             a2b->grid_points[i] = clut->grid_points[i];
859             // The grid only makes sense with at least two points along each axis
860             if (a2b->grid_points[i] < 2) {
861                 return false;
862             }
863             grid_size *= a2b->grid_points[i];
864         }
865         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
866             return false;
867         }
868     } else {
869         if (0 != clut_offset) {
870             return false;
871         }
872 
873         // If there is no CLUT, the number of input and output channels must match
874         if (a2b->input_channels != a2b->output_channels) {
875             return false;
876         }
877 
878         // Zero out the number of input channels to signal that we're skipping this stage
879         a2b->input_channels = 0;
880     }
881 
882     return true;
883 }
884 
885 // If you pass f, we'll fit a possibly-non-zero value for *f.
886 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)887 static int fit_linear(const skcms_Curve* curve, int N, float tol,
888                       float* c, float* d, float* f = nullptr) {
889     assert(N > 1);
890     // We iteratively fit the first points to the TF's linear piece.
891     // We want the cx + f line to pass through the first and last points we fit exactly.
892     //
893     // As we walk along the points we find the minimum and maximum slope of the line before the
894     // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
895     // emtpy, when we definitely can't add any more points.
896     //
897     // Some points' error intervals may intersect the running interval but not lie fully
898     // within it.  So we keep track of the last point we saw that is a valid end point candidate,
899     // and once the search is done, back up to build the line through *that* point.
900     const float dx = 1.0f / (N - 1);
901 
902     int lin_points = 1;
903 
904     float f_zero = 0.0f;
905     if (f) {
906         *f = eval_curve(curve, 0);
907     } else {
908         f = &f_zero;
909     }
910 
911 
912     float slope_min = -INFINITY_;
913     float slope_max = +INFINITY_;
914     for (int i = 1; i < N; ++i) {
915         float x = i * dx;
916         float y = eval_curve(curve, x);
917 
918         float slope_max_i = (y + tol - *f) / x,
919               slope_min_i = (y - tol - *f) / x;
920         if (slope_max_i < slope_min || slope_max < slope_min_i) {
921             // Slope intervals would no longer overlap.
922             break;
923         }
924         slope_max = fminf_(slope_max, slope_max_i);
925         slope_min = fmaxf_(slope_min, slope_min_i);
926 
927         float cur_slope = (y - *f) / x;
928         if (slope_min <= cur_slope && cur_slope <= slope_max) {
929             lin_points = i + 1;
930             *c = cur_slope;
931         }
932     }
933 
934     // Set D to the last point that met our tolerance.
935     *d = (lin_points - 1) * dx;
936     return lin_points;
937 }
938 
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)939 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
940     bool ok = false;
941     if (tag->type == skcms_Signature_mft1) {
942         ok = read_tag_mft1(tag, a2b);
943     } else if (tag->type == skcms_Signature_mft2) {
944         ok = read_tag_mft2(tag, a2b);
945     } else if (tag->type == skcms_Signature_mAB) {
946         ok = read_tag_mab(tag, a2b, pcs_is_xyz);
947     }
948     if (!ok) {
949         return false;
950     }
951 
952     // Detect and canonicalize identity tables.
953     skcms_Curve* curves[] = {
954         a2b->input_channels  > 0 ? a2b->input_curves  + 0 : nullptr,
955         a2b->input_channels  > 1 ? a2b->input_curves  + 1 : nullptr,
956         a2b->input_channels  > 2 ? a2b->input_curves  + 2 : nullptr,
957         a2b->input_channels  > 3 ? a2b->input_curves  + 3 : nullptr,
958         a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
959         a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
960         a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
961         a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
962         a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
963         a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
964     };
965 
966     for (int i = 0; i < ARRAY_COUNT(curves); i++) {
967         skcms_Curve* curve = curves[i];
968 
969         if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
970             int N = (int)curve->table_entries;
971 
972             float c = 0.0f, d = 0.0f, f = 0.0f;
973             if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
974                 && c == 1.0f
975                 && f == 0.0f) {
976                 curve->table_entries = 0;
977                 curve->table_8       = nullptr;
978                 curve->table_16      = nullptr;
979                 curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
980             }
981         }
982     }
983 
984     return true;
985 }
986 
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)987 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
988     if (!profile || !profile->buffer || !tag) { return; }
989     if (idx > profile->tag_count) { return; }
990     const tag_Layout* tags = get_tag_table(profile);
991     tag->signature = read_big_u32(tags[idx].signature);
992     tag->size      = read_big_u32(tags[idx].size);
993     tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
994     tag->type      = read_big_u32(tag->buf);
995 }
996 
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)997 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
998     if (!profile || !profile->buffer || !tag) { return false; }
999     const tag_Layout* tags = get_tag_table(profile);
1000     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1001         if (read_big_u32(tags[i].signature) == sig) {
1002             tag->signature = sig;
1003             tag->size      = read_big_u32(tags[i].size);
1004             tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
1005             tag->type      = read_big_u32(tag->buf);
1006             return true;
1007         }
1008     }
1009     return false;
1010 }
1011 
usable_as_src(const skcms_ICCProfile * profile)1012 static bool usable_as_src(const skcms_ICCProfile* profile) {
1013     return profile->has_A2B
1014        || (profile->has_trc && profile->has_toXYZD50);
1015 }
1016 
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)1017 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
1018     assert(SAFE_SIZEOF(header_Layout) == 132);
1019 
1020     if (!profile) {
1021         return false;
1022     }
1023     memset(profile, 0, SAFE_SIZEOF(*profile));
1024 
1025     if (len < SAFE_SIZEOF(header_Layout)) {
1026         return false;
1027     }
1028 
1029     // Byte-swap all header fields
1030     const header_Layout* header  = (const header_Layout*)buf;
1031     profile->buffer              = (const uint8_t*)buf;
1032     profile->size                = read_big_u32(header->size);
1033     uint32_t version             = read_big_u32(header->version);
1034     profile->data_color_space    = read_big_u32(header->data_color_space);
1035     profile->pcs                 = read_big_u32(header->pcs);
1036     uint32_t signature           = read_big_u32(header->signature);
1037     float illuminant_X           = read_big_fixed(header->illuminant_X);
1038     float illuminant_Y           = read_big_fixed(header->illuminant_Y);
1039     float illuminant_Z           = read_big_fixed(header->illuminant_Z);
1040     profile->tag_count           = read_big_u32(header->tag_count);
1041 
1042     // Validate signature, size (smaller than buffer, large enough to hold tag table),
1043     // and major version
1044     uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1045     if (signature != skcms_Signature_acsp ||
1046         profile->size > len ||
1047         profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1048         (version >> 24) > 4) {
1049         return false;
1050     }
1051 
1052     // Validate that illuminant is D50 white
1053     if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1054         fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1055         fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1056         return false;
1057     }
1058 
1059     // Validate that all tag entries have sane offset + size
1060     const tag_Layout* tags = get_tag_table(profile);
1061     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1062         uint32_t tag_offset = read_big_u32(tags[i].offset);
1063         uint32_t tag_size   = read_big_u32(tags[i].size);
1064         uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
1065         if (tag_size < 4 || tag_end > profile->size) {
1066             return false;
1067         }
1068     }
1069 
1070     if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1071         return false;
1072     }
1073 
1074     bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1075 
1076     // Pre-parse commonly used tags.
1077     skcms_ICCTag kTRC;
1078     if (profile->data_color_space == skcms_Signature_Gray &&
1079         skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1080         if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1081             // Malformed tag
1082             return false;
1083         }
1084         profile->trc[1] = profile->trc[0];
1085         profile->trc[2] = profile->trc[0];
1086         profile->has_trc = true;
1087 
1088         if (pcs_is_xyz) {
1089             profile->toXYZD50.vals[0][0] = illuminant_X;
1090             profile->toXYZD50.vals[1][1] = illuminant_Y;
1091             profile->toXYZD50.vals[2][2] = illuminant_Z;
1092             profile->has_toXYZD50 = true;
1093         }
1094     } else {
1095         skcms_ICCTag rTRC, gTRC, bTRC;
1096         if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1097             skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1098             skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1099             if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1100                 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1101                 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1102                 // Malformed TRC tags
1103                 return false;
1104             }
1105             profile->has_trc = true;
1106         }
1107 
1108         skcms_ICCTag rXYZ, gXYZ, bXYZ;
1109         if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1110             skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1111             skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1112             if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1113                 // Malformed XYZ tags
1114                 return false;
1115             }
1116             profile->has_toXYZD50 = true;
1117         }
1118     }
1119 
1120     skcms_ICCTag a2b_tag;
1121 
1122     // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
1123     // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
1124     // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
1125     // and all our known users are thinking exclusively in terms of relative colormetric.
1126     const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
1127     for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
1128         if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
1129             if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
1130                 // Malformed A2B tag
1131                 return false;
1132             }
1133             profile->has_A2B = true;
1134             break;
1135         }
1136     }
1137 
1138     return usable_as_src(profile);
1139 }
1140 
1141 
skcms_sRGB_profile()1142 const skcms_ICCProfile* skcms_sRGB_profile() {
1143     static const skcms_ICCProfile sRGB_profile = {
1144         nullptr,               // buffer, moot here
1145 
1146         0,                     // size, moot here
1147         skcms_Signature_RGB,   // data_color_space
1148         skcms_Signature_XYZ,   // pcs
1149         0,                     // tag count, moot here
1150 
1151         // We choose to represent sRGB with its canonical transfer function,
1152         // and with its canonical XYZD50 gamut matrix.
1153         true,  // has_trc, followed by the 3 trc curves
1154         {
1155             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1156             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1157             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1158         },
1159 
1160         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1161         {{
1162             { 0.436065674f, 0.385147095f, 0.143066406f },
1163             { 0.222488403f, 0.716873169f, 0.060607910f },
1164             { 0.013916016f, 0.097076416f, 0.714096069f },
1165         }},
1166 
1167         false, // has_A2B, followed by a2b itself which we don't care about.
1168         {
1169             0,
1170             {
1171                 {{0, {0,0, 0,0,0,0,0}}},
1172                 {{0, {0,0, 0,0,0,0,0}}},
1173                 {{0, {0,0, 0,0,0,0,0}}},
1174                 {{0, {0,0, 0,0,0,0,0}}},
1175             },
1176             {0,0,0,0},
1177             nullptr,
1178             nullptr,
1179 
1180             0,
1181             {
1182                 {{0, {0,0, 0,0,0,0,0}}},
1183                 {{0, {0,0, 0,0,0,0,0}}},
1184                 {{0, {0,0, 0,0,0,0,0}}},
1185             },
1186             {{
1187                 { 0,0,0,0 },
1188                 { 0,0,0,0 },
1189                 { 0,0,0,0 },
1190             }},
1191 
1192             0,
1193             {
1194                 {{0, {0,0, 0,0,0,0,0}}},
1195                 {{0, {0,0, 0,0,0,0,0}}},
1196                 {{0, {0,0, 0,0,0,0,0}}},
1197             },
1198         },
1199     };
1200     return &sRGB_profile;
1201 }
1202 
skcms_XYZD50_profile()1203 const skcms_ICCProfile* skcms_XYZD50_profile() {
1204     // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1205     static const skcms_ICCProfile XYZD50_profile = {
1206         nullptr,               // buffer, moot here
1207 
1208         0,                     // size, moot here
1209         skcms_Signature_RGB,   // data_color_space
1210         skcms_Signature_XYZ,   // pcs
1211         0,                     // tag count, moot here
1212 
1213         true,  // has_trc, followed by the 3 trc curves
1214         {
1215             {{0, {1,1, 0,0,0,0,0}}},
1216             {{0, {1,1, 0,0,0,0,0}}},
1217             {{0, {1,1, 0,0,0,0,0}}},
1218         },
1219 
1220         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1221         {{
1222             { 1,0,0 },
1223             { 0,1,0 },
1224             { 0,0,1 },
1225         }},
1226 
1227         false, // has_A2B, followed by a2b itself which we don't care about.
1228         {
1229             0,
1230             {
1231                 {{0, {0,0, 0,0,0,0,0}}},
1232                 {{0, {0,0, 0,0,0,0,0}}},
1233                 {{0, {0,0, 0,0,0,0,0}}},
1234                 {{0, {0,0, 0,0,0,0,0}}},
1235             },
1236             {0,0,0,0},
1237             nullptr,
1238             nullptr,
1239 
1240             0,
1241             {
1242                 {{0, {0,0, 0,0,0,0,0}}},
1243                 {{0, {0,0, 0,0,0,0,0}}},
1244                 {{0, {0,0, 0,0,0,0,0}}},
1245             },
1246             {{
1247                 { 0,0,0,0 },
1248                 { 0,0,0,0 },
1249                 { 0,0,0,0 },
1250             }},
1251 
1252             0,
1253             {
1254                 {{0, {0,0, 0,0,0,0,0}}},
1255                 {{0, {0,0, 0,0,0,0,0}}},
1256                 {{0, {0,0, 0,0,0,0,0}}},
1257             },
1258         },
1259     };
1260 
1261     return &XYZD50_profile;
1262 }
1263 
skcms_sRGB_TransferFunction()1264 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1265     return &skcms_sRGB_profile()->trc[0].parametric;
1266 }
1267 
skcms_sRGB_Inverse_TransferFunction()1268 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1269     static const skcms_TransferFunction sRGB_inv =
1270         {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1271     return &sRGB_inv;
1272 }
1273 
skcms_Identity_TransferFunction()1274 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1275     static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1276     return &identity;
1277 }
1278 
1279 const uint8_t skcms_252_random_bytes[] = {
1280     8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1281     119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1282     154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1283     194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1284     108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1285     70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1286     137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1287     9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1288     129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1289     140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1290     219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1291     123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1292     189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1293     174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1294     2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1295     112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1296 };
1297 
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1298 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1299     // Test for exactly equal profiles first.
1300     if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1301         return true;
1302     }
1303 
1304     // For now this is the essentially the same strategy we use in test_only.c
1305     // for our skcms_Transform() smoke tests:
1306     //    1) transform A to XYZD50
1307     //    2) transform B to XYZD50
1308     //    3) return true if they're similar enough
1309     // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1310 
1311     // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1312     // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1313 
1314     if (A->data_color_space != B->data_color_space) {
1315         return false;
1316     }
1317 
1318     // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1319     // TODO: working with RGBA_8888 either way is probably fastest.
1320     skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1321     size_t npixels = 84;
1322     if (A->data_color_space == skcms_Signature_CMYK) {
1323         fmt = skcms_PixelFormat_RGBA_8888;
1324         npixels = 63;
1325     }
1326 
1327     // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1328     // use pre-canned results and skip that skcms_Transform() call?
1329     uint8_t dstA[252],
1330             dstB[252];
1331     if (!skcms_Transform(
1332                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1333                 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1334                 npixels)) {
1335         return false;
1336     }
1337     if (!skcms_Transform(
1338                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1339                 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1340                 npixels)) {
1341         return false;
1342     }
1343 
1344     // TODO: make sure this final check has reasonable codegen.
1345     for (size_t i = 0; i < 252; i++) {
1346         if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1347             return false;
1348         }
1349     }
1350     return true;
1351 }
1352 
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1353 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1354                                       const skcms_TransferFunction* inv_tf) {
1355     if (!profile || !profile->has_trc) {
1356         return false;
1357     }
1358 
1359     return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1360            skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1361            skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1362 }
1363 
is_zero_to_one(float x)1364 static bool is_zero_to_one(float x) {
1365     return 0 <= x && x <= 1;
1366 }
1367 
1368 typedef struct { float vals[3]; } skcms_Vector3;
1369 
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1370 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1371     skcms_Vector3 dst = {{0,0,0}};
1372     for (int row = 0; row < 3; ++row) {
1373         dst.vals[row] = m->vals[row][0] * v->vals[0]
1374                       + m->vals[row][1] * v->vals[1]
1375                       + m->vals[row][2] * v->vals[2];
1376     }
1377     return dst;
1378 }
1379 
skcms_AdaptToXYZD50(float wx,float wy,skcms_Matrix3x3 * toXYZD50)1380 bool skcms_AdaptToXYZD50(float wx, float wy,
1381                          skcms_Matrix3x3* toXYZD50) {
1382     if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1383         !toXYZD50) {
1384         return false;
1385     }
1386 
1387     // Assumes that Y is 1.0f.
1388     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1389 
1390     // Now convert toXYZ matrix to toXYZD50.
1391     skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1392 
1393     // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1394     // the matrices below.  The Bradford method is used by Adobe and is widely considered
1395     // to be the best.
1396     skcms_Matrix3x3 xyz_to_lms = {{
1397         {  0.8951f,  0.2664f, -0.1614f },
1398         { -0.7502f,  1.7135f,  0.0367f },
1399         {  0.0389f, -0.0685f,  1.0296f },
1400     }};
1401     skcms_Matrix3x3 lms_to_xyz = {{
1402         {  0.9869929f, -0.1470543f, 0.1599627f },
1403         {  0.4323053f,  0.5183603f, 0.0492912f },
1404         { -0.0085287f,  0.0400428f, 0.9684867f },
1405     }};
1406 
1407     skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1408     skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1409 
1410     *toXYZD50 = {{
1411         { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1412         { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1413         { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1414     }};
1415     *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1416     *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1417 
1418     return true;
1419 }
1420 
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1421 bool skcms_PrimariesToXYZD50(float rx, float ry,
1422                              float gx, float gy,
1423                              float bx, float by,
1424                              float wx, float wy,
1425                              skcms_Matrix3x3* toXYZD50) {
1426     if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1427         !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1428         !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1429         !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1430         !toXYZD50) {
1431         return false;
1432     }
1433 
1434     // First, we need to convert xy values (primaries) to XYZ.
1435     skcms_Matrix3x3 primaries = {{
1436         { rx, gx, bx },
1437         { ry, gy, by },
1438         { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1439     }};
1440     skcms_Matrix3x3 primaries_inv;
1441     if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1442         return false;
1443     }
1444 
1445     // Assumes that Y is 1.0f.
1446     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1447     skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1448 
1449     skcms_Matrix3x3 toXYZ = {{
1450         { XYZ.vals[0],           0,           0 },
1451         {           0, XYZ.vals[1],           0 },
1452         {           0,           0, XYZ.vals[2] },
1453     }};
1454     toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1455 
1456     skcms_Matrix3x3 DXtoD50;
1457     if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1458         return false;
1459     }
1460 
1461     *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1462     return true;
1463 }
1464 
1465 
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1466 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1467     double a00 = src->vals[0][0],
1468            a01 = src->vals[1][0],
1469            a02 = src->vals[2][0],
1470            a10 = src->vals[0][1],
1471            a11 = src->vals[1][1],
1472            a12 = src->vals[2][1],
1473            a20 = src->vals[0][2],
1474            a21 = src->vals[1][2],
1475            a22 = src->vals[2][2];
1476 
1477     double b0 = a00*a11 - a01*a10,
1478            b1 = a00*a12 - a02*a10,
1479            b2 = a01*a12 - a02*a11,
1480            b3 = a20,
1481            b4 = a21,
1482            b5 = a22;
1483 
1484     double determinant = b0*b5
1485                        - b1*b4
1486                        + b2*b3;
1487 
1488     if (determinant == 0) {
1489         return false;
1490     }
1491 
1492     double invdet = 1.0 / determinant;
1493     if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1494         return false;
1495     }
1496 
1497     b0 *= invdet;
1498     b1 *= invdet;
1499     b2 *= invdet;
1500     b3 *= invdet;
1501     b4 *= invdet;
1502     b5 *= invdet;
1503 
1504     dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1505     dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1506     dst->vals[2][0] = (float)(        +     b2 );
1507     dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1508     dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1509     dst->vals[2][1] = (float)(        -     b1 );
1510     dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1511     dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1512     dst->vals[2][2] = (float)(        +     b0 );
1513 
1514     for (int r = 0; r < 3; ++r)
1515     for (int c = 0; c < 3; ++c) {
1516         if (!isfinitef_(dst->vals[r][c])) {
1517             return false;
1518         }
1519     }
1520     return true;
1521 }
1522 
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1523 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1524     skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1525     for (int r = 0; r < 3; r++)
1526         for (int c = 0; c < 3; c++) {
1527             m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1528                          + A->vals[r][1] * B->vals[1][c]
1529                          + A->vals[r][2] * B->vals[2][c];
1530         }
1531     return m;
1532 }
1533 
1534 #if defined(__clang__)
1535     [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
1536 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1537 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1538     TF_PQish  pq;
1539     TF_HLGish hlg;
1540     switch (classify(*src, &pq, &hlg)) {
1541         case Bad: return false;
1542         case sRGBish: break;  // handled below
1543 
1544         case PQish:
1545             *dst = { TFKind_marker(PQish), -pq.A,  pq.D, 1.0f/pq.F
1546                                          ,  pq.B, -pq.E, 1.0f/pq.C};
1547             return true;
1548 
1549         case HLGish:
1550             *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1551                                              , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1552             return true;
1553 
1554         case HLGinvish:
1555             *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1556                                           , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1557             return true;
1558     }
1559 
1560     assert (classify(*src) == sRGBish);
1561 
1562     // We're inverting this function, solving for x in terms of y.
1563     //   y = (cx + f)         x < d
1564     //       (ax + b)^g + e   x ≥ d
1565     // The inverse of this function can be expressed in the same piecewise form.
1566     skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1567 
1568     // We'll start by finding the new threshold inv.d.
1569     // In principle we should be able to find that by solving for y at x=d from either side.
1570     // (If those two d values aren't the same, it's a discontinuous transfer function.)
1571     float d_l =       src->c * src->d + src->f,
1572           d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1573     if (fabsf_(d_l - d_r) > 1/512.0f) {
1574         return false;
1575     }
1576     inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1577 
1578     // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1579     if (inv.d > 0) {
1580         // Inverting the linear section is pretty straightfoward:
1581         //        y       = cx + f
1582         //        y - f   = cx
1583         //   (1/c)y - f/c = x
1584         inv.c =    1.0f/src->c;
1585         inv.f = -src->f/src->c;
1586     }
1587 
1588     // The interesting part is inverting the nonlinear section:
1589     //         y                = (ax + b)^g + e.
1590     //         y - e            = (ax + b)^g
1591     //        (y - e)^1/g       =  ax + b
1592     //        (y - e)^1/g - b   =  ax
1593     //   (1/a)(y - e)^1/g - b/a =   x
1594     //
1595     // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1596     //   let k = (1/a)^g
1597     //   (1/a)( y -  e)^1/g - b/a = x
1598     //        (ky - ke)^1/g - b/a = x
1599 
1600     float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1601     inv.g = 1.0f / src->g;
1602     inv.a = k;
1603     inv.b = -k * src->e;
1604     inv.e = -src->b / src->a;
1605 
1606     // We need to enforce the same constraints here that we do when fitting a curve,
1607     // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
1608     // of the source function if we're here.
1609 
1610     // Just like when fitting the curve, there's really no way to rescue a < 0.
1611     if (inv.a < 0) {
1612         return false;
1613     }
1614     // On the other hand we can rescue an ad+b that's gone slightly negative here.
1615     if (inv.a * inv.d + inv.b < 0) {
1616         inv.b = -inv.a * inv.d;
1617     }
1618 
1619     // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1620     // where we might still fail here, like non-finite parameter values.
1621     if (classify(inv) != sRGBish) {
1622         return false;
1623     }
1624 
1625     assert (inv.a >= 0);
1626     assert (inv.a * inv.d + inv.b >= 0);
1627 
1628     // Now in principle we're done.
1629     // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1630     // e or f of the inverse, depending on which segment contains src(1.0f).
1631     float s = skcms_TransferFunction_eval(src, 1.0f);
1632     if (!isfinitef_(s)) {
1633         return false;
1634     }
1635 
1636     float sign = s < 0 ? -1.0f : 1.0f;
1637     s *= sign;
1638     if (s < inv.d) {
1639         inv.f = 1.0f - sign * inv.c * s;
1640     } else {
1641         inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1642     }
1643 
1644     *dst = inv;
1645     return classify(*dst) == sRGBish;
1646 }
1647 
1648 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1649 
1650 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1651 //
1652 //   tf(x) =  cx + f          x < d
1653 //   tf(x) = (ax + b)^g + e   x ≥ d
1654 //
1655 // When fitting, we add the additional constraint that both pieces meet at d:
1656 //
1657 //   cd + f = (ad + b)^g + e
1658 //
1659 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1660 //
1661 //   tf(x) =                           cx + f   x < d
1662 //   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
1663 //
1664 // Our overall strategy is then:
1665 //    For a couple tolerances,
1666 //       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
1667 //       - invert c,d,f
1668 //       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1669 //                          (and by constraint, inverted e) to the inverse of the table.
1670 //    Return the parameters with least maximum error.
1671 //
1672 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1673 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1674 //
1675 //    let y = Table(x)
1676 //    r(x) = x - f_inv(y)
1677 //
1678 //    ∂r/∂g = ln(ay + b)*(ay + b)^g
1679 //          - ln(ad + b)*(ad + b)^g
1680 //    ∂r/∂a = yg(ay + b)^(g-1)
1681 //          - dg(ad + b)^(g-1)
1682 //    ∂r/∂b =  g(ay + b)^(g-1)
1683 //          -  g(ad + b)^(g-1)
1684 
1685 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1686 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,float dfdP[3])1687 static float rg_nonlinear(float x,
1688                           const skcms_Curve* curve,
1689                           const skcms_TransferFunction* tf,
1690                           float dfdP[3]) {
1691     const float y = eval_curve(curve, x);
1692 
1693     const float g = tf->g, a = tf->a, b = tf->b,
1694                 c = tf->c, d = tf->d, f = tf->f;
1695 
1696     const float Y = fmaxf_(a*y + b, 0.0f),
1697                 D =        a*d + b;
1698     assert (D >= 0);
1699 
1700     // The gradient.
1701     dfdP[0] = logf_(Y)*powf_(Y, g)
1702             - logf_(D)*powf_(D, g);
1703     dfdP[1] = y*g*powf_(Y, g-1)
1704             - d*g*powf_(D, g-1);
1705     dfdP[2] =   g*powf_(Y, g-1)
1706             -   g*powf_(D, g-1);
1707 
1708     // The residual.
1709     const float f_inv = powf_(Y, g)
1710                       - powf_(D, g)
1711                       + c*d + f;
1712     return x - f_inv;
1713 }
1714 
gauss_newton_step(const skcms_Curve * curve,skcms_TransferFunction * tf,float x0,float dx,int N)1715 static bool gauss_newton_step(const skcms_Curve* curve,
1716                                     skcms_TransferFunction* tf,
1717                               float x0, float dx, int N) {
1718     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1719     //
1720     // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
1721     //
1722     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1723     //   where r(P) is the residual vector
1724     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
1725     //
1726     // Let's review the shape of each of these expressions:
1727     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
1728     //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
1729     //   Jf^T   is [3 x N], the transpose of Jf
1730     //
1731     //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1732     //                                              and so is its inverse (Jf^T Jf)^-1
1733     //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1734     //
1735     // Our implementation strategy to get to the final ∆P is
1736     //   1) evaluate Jf^T Jf,   call that lhs
1737     //   2) evaluate Jf^T r(P), call that rhs
1738     //   3) invert lhs
1739     //   4) multiply inverse lhs by rhs
1740     //
1741     // This is a friendly implementation strategy because we don't have to have any
1742     // buffers that scale with N, and equally nice don't have to perform any matrix
1743     // operations that are variable size.
1744     //
1745     // Other implementation strategies could trade this off, e.g. evaluating the
1746     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1747     // the residuals.  That would probably require implementing singular value
1748     // decomposition, and would create a [3 x N] matrix to be multiplied by the
1749     // [N x 1] residual vector, but on the upside I think that'd eliminate the
1750     // possibility of this gauss_newton_step() function ever failing.
1751 
1752     // 0) start off with lhs and rhs safely zeroed.
1753     skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1754     skcms_Vector3   rhs = {  {0,0,0} };
1755 
1756     // 1,2) evaluate lhs and evaluate rhs
1757     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1758     //   so we'll have to update lhs and rhs at the same time.
1759     for (int i = 0; i < N; i++) {
1760         float x = x0 + i*dx;
1761 
1762         float dfdP[3] = {0,0,0};
1763         float resid = rg_nonlinear(x,curve,tf, dfdP);
1764 
1765         for (int r = 0; r < 3; r++) {
1766             for (int c = 0; c < 3; c++) {
1767                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1768             }
1769             rhs.vals[r] += dfdP[r] * resid;
1770         }
1771     }
1772 
1773     // If any of the 3 P parameters are unused, this matrix will be singular.
1774     // Detect those cases and fix them up to indentity instead, so we can invert.
1775     for (int k = 0; k < 3; k++) {
1776         if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1777             lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1778             lhs.vals[k][k] = 1;
1779         }
1780     }
1781 
1782     // 3) invert lhs
1783     skcms_Matrix3x3 lhs_inv;
1784     if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1785         return false;
1786     }
1787 
1788     // 4) multiply inverse lhs by rhs
1789     skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1790     tf->g += dP.vals[0];
1791     tf->a += dP.vals[1];
1792     tf->b += dP.vals[2];
1793     return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
1794 }
1795 
max_roundtrip_error_checked(const skcms_Curve * curve,const skcms_TransferFunction * tf_inv)1796 static float max_roundtrip_error_checked(const skcms_Curve* curve,
1797                                          const skcms_TransferFunction* tf_inv) {
1798     skcms_TransferFunction tf;
1799     if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
1800         return INFINITY_;
1801     }
1802 
1803     skcms_TransferFunction tf_inv_again;
1804     if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
1805         return INFINITY_;
1806     }
1807 
1808     return skcms_MaxRoundtripError(curve, &tf_inv_again);
1809 }
1810 
1811 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1812 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1813     // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
1814     auto fixup_tf = [tf]() {
1815         // a must be non-negative. That ensures the function is monotonically increasing.
1816         // We don't really know how to fix up a if it goes negative.
1817         if (tf->a < 0) {
1818             return false;
1819         }
1820         // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
1821         // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
1822         if (tf->a * tf->d + tf->b < 0) {
1823             tf->b = -tf->a * tf->d;
1824         }
1825         assert (tf->a >= 0 &&
1826                 tf->a * tf->d + tf->b >= 0);
1827 
1828         // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
1829         // parameter so we can guarantee this.
1830         tf->e =   tf->c*tf->d + tf->f
1831           - powf_(tf->a*tf->d + tf->b, tf->g);
1832 
1833         return true;
1834     };
1835 
1836     if (!fixup_tf()) {
1837         return false;
1838     }
1839 
1840     // No matter where we start, dx should always represent N even steps from 0 to 1.
1841     const float dx = 1.0f / (N-1);
1842 
1843     skcms_TransferFunction best_tf = *tf;
1844     float best_max_error = INFINITY_;
1845 
1846     // Need this or several curves get worse... *sigh*
1847     float init_error = max_roundtrip_error_checked(curve, tf);
1848     if (init_error < best_max_error) {
1849         best_max_error = init_error;
1850         best_tf = *tf;
1851     }
1852 
1853     // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
1854     for (int j = 0; j < 8; j++) {
1855         if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) {
1856             *tf = best_tf;
1857             return isfinitef_(best_max_error);
1858         }
1859 
1860         float max_error = max_roundtrip_error_checked(curve, tf);
1861         if (max_error < best_max_error) {
1862             best_max_error = max_error;
1863             best_tf = *tf;
1864         }
1865     }
1866 
1867     *tf = best_tf;
1868     return isfinitef_(best_max_error);
1869 }
1870 
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1871 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1872                             skcms_TransferFunction* approx,
1873                             float* max_error) {
1874     if (!curve || !approx || !max_error) {
1875         return false;
1876     }
1877 
1878     if (curve->table_entries == 0) {
1879         // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1880         return false;
1881     }
1882 
1883     if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1884         // We need at least two points, and must put some reasonable cap on the maximum number.
1885         return false;
1886     }
1887 
1888     int N = (int)curve->table_entries;
1889     const float dx = 1.0f / (N - 1);
1890 
1891     *max_error = INFINITY_;
1892     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1893     for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1894         skcms_TransferFunction tf,
1895                                tf_inv;
1896 
1897         // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1898         tf.f = 0.0f;
1899         int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1900 
1901         if (L == N) {
1902             // If the entire data set was linear, move the coefficients to the nonlinear portion
1903             // with G == 1.  This lets use a canonical representation with d == 0.
1904             tf.g = 1;
1905             tf.a = tf.c;
1906             tf.b = tf.f;
1907             tf.c = tf.d = tf.e = tf.f = 0;
1908         } else if (L == N - 1) {
1909             // Degenerate case with only two points in the nonlinear segment. Solve directly.
1910             tf.g = 1;
1911             tf.a = (eval_curve(curve, (N-1)*dx) -
1912                     eval_curve(curve, (N-2)*dx))
1913                  / dx;
1914             tf.b = eval_curve(curve, (N-2)*dx)
1915                  - tf.a * (N-2)*dx;
1916             tf.e = 0;
1917         } else {
1918             // Start by guessing a gamma-only curve through the midpoint.
1919             int mid = (L + N) / 2;
1920             float mid_x = mid / (N - 1.0f);
1921             float mid_y = eval_curve(curve, mid_x);
1922             tf.g = log2f_(mid_y) / log2f_(mid_x);
1923             tf.a = 1;
1924             tf.b = 0;
1925             tf.e =    tf.c*tf.d + tf.f
1926               - powf_(tf.a*tf.d + tf.b, tf.g);
1927 
1928 
1929             if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1930                 !fit_nonlinear(curve, L,N, &tf_inv)) {
1931                 continue;
1932             }
1933 
1934             // We fit tf_inv, so calculate tf to keep in sync.
1935             // fit_nonlinear() should guarantee invertibility.
1936             if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1937                 assert(false);
1938                 continue;
1939             }
1940         }
1941 
1942         // We'd better have a sane, sRGB-ish TF by now.
1943         // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
1944         // anything else is just some accident of math and the way we pun tf.g as a type flag.
1945         // fit_nonlinear() should guarantee this, but the special cases may fail this test.
1946         if (sRGBish != classify(tf)) {
1947             continue;
1948         }
1949 
1950         // We find our error by roundtripping the table through tf_inv.
1951         //
1952         // (The most likely use case for this approximation is to be inverted and
1953         // used as the transfer function for a destination color space.)
1954         //
1955         // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1956         // invertible, so re-verify that here (and use the new inverse for testing).
1957         // fit_nonlinear() should guarantee this, but the special cases that don't use
1958         // it may fail this test.
1959         if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1960             continue;
1961         }
1962 
1963         float err = skcms_MaxRoundtripError(curve, &tf_inv);
1964         if (*max_error > err) {
1965             *max_error = err;
1966             *approx    = tf;
1967         }
1968     }
1969     return isfinitef_(*max_error);
1970 }
1971 
1972 // ~~~~ Impl. of skcms_Transform() ~~~~
1973 
1974 typedef enum {
1975     Op_load_a8,
1976     Op_load_g8,
1977     Op_load_8888_palette8,
1978     Op_load_4444,
1979     Op_load_565,
1980     Op_load_888,
1981     Op_load_8888,
1982     Op_load_1010102,
1983     Op_load_161616LE,
1984     Op_load_16161616LE,
1985     Op_load_161616BE,
1986     Op_load_16161616BE,
1987     Op_load_hhh,
1988     Op_load_hhhh,
1989     Op_load_fff,
1990     Op_load_ffff,
1991 
1992     Op_swap_rb,
1993     Op_clamp,
1994     Op_invert,
1995     Op_force_opaque,
1996     Op_premul,
1997     Op_unpremul,
1998     Op_matrix_3x3,
1999     Op_matrix_3x4,
2000     Op_lab_to_xyz,
2001 
2002     Op_tf_r,
2003     Op_tf_g,
2004     Op_tf_b,
2005     Op_tf_a,
2006 
2007     Op_pq_r,
2008     Op_pq_g,
2009     Op_pq_b,
2010     Op_pq_a,
2011 
2012     Op_hlg_r,
2013     Op_hlg_g,
2014     Op_hlg_b,
2015     Op_hlg_a,
2016 
2017     Op_hlginv_r,
2018     Op_hlginv_g,
2019     Op_hlginv_b,
2020     Op_hlginv_a,
2021 
2022     Op_table_r,
2023     Op_table_g,
2024     Op_table_b,
2025     Op_table_a,
2026 
2027     Op_clut,
2028 
2029     Op_store_a8,
2030     Op_store_g8,
2031     Op_store_4444,
2032     Op_store_565,
2033     Op_store_888,
2034     Op_store_8888,
2035     Op_store_1010102,
2036     Op_store_161616LE,
2037     Op_store_16161616LE,
2038     Op_store_161616BE,
2039     Op_store_16161616BE,
2040     Op_store_hhh,
2041     Op_store_hhhh,
2042     Op_store_fff,
2043     Op_store_ffff,
2044 } Op;
2045 
2046 #if defined(__clang__)
2047     template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2048 #elif defined(__GNUC__)
2049     // For some reason GCC accepts this nonsense, but not the more straightforward version,
2050     //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2051     template <int N, typename T>
2052     struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2053 
2054     template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2055 #endif
2056 
2057 // First, instantiate our default exec_ops() implementation using the default compiliation target.
2058 
2059 namespace baseline {
2060 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2061                             || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2062     #define N 1
2063     template <typename T> using V = T;
2064     using Color = float;
2065 #elif defined(__AVX512F__)
2066     #define N 16
2067     template <typename T> using V = Vec<N,T>;
2068     using Color = float;
2069 #elif defined(__AVX__)
2070     #define N 8
2071     template <typename T> using V = Vec<N,T>;
2072     using Color = float;
2073 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2074     #define N 8
2075     template <typename T> using V = Vec<N,T>;
2076     using Color = _Float16;
2077 #else
2078     #define N 4
2079     template <typename T> using V = Vec<N,T>;
2080     using Color = float;
2081 #endif
2082 
2083     #include "src/Transform_inl.h"
2084     #undef N
2085 }
2086 
2087 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2088 #if !defined(SKCMS_PORTABLE) &&                           \
2089     !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) &&           \
2090         ((defined(__APPLE__) && defined(__apple_build_version__) && __clang_major__ >= 9) || \
2091          (!defined(__apple_build_version__) && defined(__clang__) && __clang_major__ >= 5) || \
2092          (!defined(__clang__) && defined(__GNUC__)))      \
2093      && defined(__x86_64__)
2094 
2095     #if !defined(__AVX2__)
2096         #if defined(__clang__)
2097             #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2098         #elif defined(__GNUC__)
2099             #pragma GCC push_options
2100             #pragma GCC target("avx2,f16c")
2101         #endif
2102 
2103         namespace hsw {
2104             #define USING_AVX
2105             #define USING_AVX_F16C
2106             #define USING_AVX2
2107             #define N 8
2108             template <typename T> using V = Vec<N,T>;
2109             using Color = float;
2110 
2111             #include "src/Transform_inl.h"
2112 
2113             // src/Transform_inl.h will undefine USING_* for us.
2114             #undef N
2115         }
2116 
2117         #if defined(__clang__)
2118             #pragma clang attribute pop
2119         #elif defined(__GNUC__)
2120             #pragma GCC pop_options
2121         #endif
2122 
2123         #define TEST_FOR_HSW
2124     #endif
2125 
2126     #if !defined(__AVX512F__)
2127         #if defined(__clang__)
2128             #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2129         #elif defined(__GNUC__)
2130             #pragma GCC push_options
2131             #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2132         #endif
2133 
2134         namespace skx {
2135             #define USING_AVX512F
2136             #define N 16
2137             template <typename T> using V = Vec<N,T>;
2138             using Color = float;
2139 
2140             #include "src/Transform_inl.h"
2141 
2142             // src/Transform_inl.h will undefine USING_* for us.
2143             #undef N
2144         }
2145 
2146         #if defined(__clang__)
2147             #pragma clang attribute pop
2148         #elif defined(__GNUC__)
2149             #pragma GCC pop_options
2150         #endif
2151 
2152         #define TEST_FOR_SKX
2153     #endif
2154 
2155     #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2156         enum class CpuType { None, HSW, SKX };
cpu_type()2157         static CpuType cpu_type() {
2158             static const CpuType type = []{
2159                 // See http://www.sandpile.org/x86/cpuid.htm
2160 
2161                 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2162                 uint32_t eax, ebx, ecx, edx;
2163                 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2164                                              : "0"(1), "2"(0));
2165                 if ((edx & (1u<<25)) &&  // SSE
2166                     (edx & (1u<<26)) &&  // SSE2
2167                     (ecx & (1u<< 0)) &&  // SSE3
2168                     (ecx & (1u<< 9)) &&  // SSSE3
2169                     (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
2170                     (ecx & (1u<<19)) &&  // SSE4.1
2171                     (ecx & (1u<<20)) &&  // SSE4.2
2172                     (ecx & (1u<<26)) &&  // XSAVE
2173                     (ecx & (1u<<27)) &&  // OSXSAVE
2174                     (ecx & (1u<<28)) &&  // AVX
2175                     (ecx & (1u<<29))) {  // F16C
2176 
2177                     // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2178                     __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2179                                                  : "0"(7), "2"(0));
2180                     // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2181                     uint32_t xcr0, dont_need_edx;
2182                     __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2183 
2184                     if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
2185                         (xcr0 & (1u<<2)) &&  // YMM register state saved?
2186                         (ebx  & (1u<<5))) {  // AVX2
2187                         // At this point we're at least HSW.  Continue checking for SKX.
2188                         if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2189                             (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2190                             (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2191                             (ebx  & (1u<<16)) && // AVX512F
2192                             (ebx  & (1u<<17)) && // AVX512DQ
2193                             (ebx  & (1u<<28)) && // AVX512CD
2194                             (ebx  & (1u<<30)) && // AVX512BW
2195                             (ebx  & (1u<<31))) { // AVX512VL
2196                             return CpuType::SKX;
2197                         }
2198                         return CpuType::HSW;
2199                     }
2200                 }
2201                 return CpuType::None;
2202             }();
2203             return type;
2204         }
2205     #endif
2206 
2207 #endif
2208 
2209 typedef struct {
2210     Op          op;
2211     const void* arg;
2212 } OpAndArg;
2213 
select_curve_op(const skcms_Curve * curve,int channel)2214 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2215     static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2216         { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2217         { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2218         { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2219         { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2220     };
2221     const auto& op = ops[channel];
2222 
2223     if (curve->table_entries == 0) {
2224         const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2225 
2226         const skcms_TransferFunction& tf = curve->parametric;
2227 
2228         if (tf.g == 1 && tf.a == 1 &&
2229             tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2230             return noop;
2231         }
2232 
2233         switch (classify(tf)) {
2234             case Bad:        return noop;
2235             case sRGBish:    return OpAndArg{op.sRGBish,   &tf};
2236             case PQish:      return OpAndArg{op.PQish,     &tf};
2237             case HLGish:     return OpAndArg{op.HLGish,    &tf};
2238             case HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
2239         }
2240     }
2241     return OpAndArg{op.table, curve};
2242 }
2243 
bytes_per_pixel(skcms_PixelFormat fmt)2244 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2245     switch (fmt >> 1) {   // ignore rgb/bgr
2246         case skcms_PixelFormat_A_8                >> 1: return  1;
2247         case skcms_PixelFormat_G_8                >> 1: return  1;
2248         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return  1;
2249         case skcms_PixelFormat_ABGR_4444          >> 1: return  2;
2250         case skcms_PixelFormat_RGB_565            >> 1: return  2;
2251         case skcms_PixelFormat_RGB_888            >> 1: return  3;
2252         case skcms_PixelFormat_RGBA_8888          >> 1: return  4;
2253         case skcms_PixelFormat_RGBA_8888_sRGB     >> 1: return  4;
2254         case skcms_PixelFormat_RGBA_1010102       >> 1: return  4;
2255         case skcms_PixelFormat_RGB_161616LE       >> 1: return  6;
2256         case skcms_PixelFormat_RGBA_16161616LE    >> 1: return  8;
2257         case skcms_PixelFormat_RGB_161616BE       >> 1: return  6;
2258         case skcms_PixelFormat_RGBA_16161616BE    >> 1: return  8;
2259         case skcms_PixelFormat_RGB_hhh_Norm       >> 1: return  6;
2260         case skcms_PixelFormat_RGBA_hhhh_Norm     >> 1: return  8;
2261         case skcms_PixelFormat_RGB_hhh            >> 1: return  6;
2262         case skcms_PixelFormat_RGBA_hhhh          >> 1: return  8;
2263         case skcms_PixelFormat_RGB_fff            >> 1: return 12;
2264         case skcms_PixelFormat_RGBA_ffff          >> 1: return 16;
2265     }
2266     assert(false);
2267     return 0;
2268 }
2269 
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2270 static bool prep_for_destination(const skcms_ICCProfile* profile,
2271                                  skcms_Matrix3x3* fromXYZD50,
2272                                  skcms_TransferFunction* invR,
2273                                  skcms_TransferFunction* invG,
2274                                  skcms_TransferFunction* invB) {
2275     // We only support destinations with parametric transfer functions
2276     // and with gamuts that can be transformed from XYZD50.
2277     return profile->has_trc
2278         && profile->has_toXYZD50
2279         && profile->trc[0].table_entries == 0
2280         && profile->trc[1].table_entries == 0
2281         && profile->trc[2].table_entries == 0
2282         && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2283         && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2284         && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2285         && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2286 }
2287 
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2288 bool skcms_Transform(const void*             src,
2289                      skcms_PixelFormat       srcFmt,
2290                      skcms_AlphaFormat       srcAlpha,
2291                      const skcms_ICCProfile* srcProfile,
2292                      void*                   dst,
2293                      skcms_PixelFormat       dstFmt,
2294                      skcms_AlphaFormat       dstAlpha,
2295                      const skcms_ICCProfile* dstProfile,
2296                      size_t                  npixels) {
2297     return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2298                                       dst, dstFmt, dstAlpha, dstProfile,
2299                                       npixels, nullptr);
2300 }
2301 
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2302 bool skcms_TransformWithPalette(const void*             src,
2303                                 skcms_PixelFormat       srcFmt,
2304                                 skcms_AlphaFormat       srcAlpha,
2305                                 const skcms_ICCProfile* srcProfile,
2306                                 void*                   dst,
2307                                 skcms_PixelFormat       dstFmt,
2308                                 skcms_AlphaFormat       dstAlpha,
2309                                 const skcms_ICCProfile* dstProfile,
2310                                 size_t                  nz,
2311                                 const void*             palette) {
2312     const size_t dst_bpp = bytes_per_pixel(dstFmt),
2313                  src_bpp = bytes_per_pixel(srcFmt);
2314     // Let's just refuse if the request is absurdly big.
2315     if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2316         return false;
2317     }
2318     int n = (int)nz;
2319 
2320     // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2321     if (!srcProfile) {
2322         srcProfile = skcms_sRGB_profile();
2323     }
2324     if (!dstProfile) {
2325         dstProfile = skcms_sRGB_profile();
2326     }
2327 
2328     // We can't transform in place unless the PixelFormats are the same size.
2329     if (dst == src && dst_bpp != src_bpp) {
2330         return false;
2331     }
2332     // TODO: more careful alias rejection (like, dst == src + 1)?
2333 
2334     if (needs_palette(srcFmt) && !palette) {
2335         return false;
2336     }
2337 
2338     Op          program  [32];
2339     const void* arguments[32];
2340 
2341     Op*          ops  = program;
2342     const void** args = arguments;
2343 
2344     // These are always parametric curves of some sort.
2345     skcms_Curve dst_curves[3];
2346     dst_curves[0].table_entries =
2347     dst_curves[1].table_entries =
2348     dst_curves[2].table_entries = 0;
2349 
2350     skcms_Matrix3x3        from_xyz;
2351 
2352     switch (srcFmt >> 1) {
2353         default: return false;
2354         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_load_a8;         break;
2355         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_load_g8;         break;
2356         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_load_4444;       break;
2357         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_load_565;        break;
2358         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_load_888;        break;
2359         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_load_8888;       break;
2360         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_load_1010102;    break;
2361         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_load_161616LE;   break;
2362         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2363         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_load_161616BE;   break;
2364         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2365         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_load_hhh;        break;
2366         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_load_hhhh;       break;
2367         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_load_hhh;        break;
2368         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_load_hhhh;       break;
2369         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_load_fff;        break;
2370         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_load_ffff;       break;
2371 
2372         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++  = Op_load_8888_palette8;
2373                                                         *args++ = palette;
2374                                                         break;
2375         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2376             *ops++ = Op_load_8888;
2377             *ops++ = Op_tf_r;       *args++ = skcms_sRGB_TransferFunction();
2378             *ops++ = Op_tf_g;       *args++ = skcms_sRGB_TransferFunction();
2379             *ops++ = Op_tf_b;       *args++ = skcms_sRGB_TransferFunction();
2380             break;
2381     }
2382     if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2383         srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2384         *ops++ = Op_clamp;
2385     }
2386     if (srcFmt & 1) {
2387         *ops++ = Op_swap_rb;
2388     }
2389     skcms_ICCProfile gray_dst_profile;
2390     if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2391         // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2392         // luminance (Y) by the destination transfer function.
2393         gray_dst_profile = *dstProfile;
2394         skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2395         dstProfile = &gray_dst_profile;
2396     }
2397 
2398     if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2399         // Photoshop creates CMYK images as inverse CMYK.
2400         // These happen to be the only ones we've _ever_ seen.
2401         *ops++ = Op_invert;
2402         // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2403         srcAlpha = skcms_AlphaFormat_Unpremul;
2404     }
2405 
2406     if (srcAlpha == skcms_AlphaFormat_Opaque) {
2407         *ops++ = Op_force_opaque;
2408     } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2409         *ops++ = Op_unpremul;
2410     }
2411 
2412     if (dstProfile != srcProfile) {
2413 
2414         if (!prep_for_destination(dstProfile,
2415                                   &from_xyz,
2416                                   &dst_curves[0].parametric,
2417                                   &dst_curves[1].parametric,
2418                                   &dst_curves[2].parametric)) {
2419             return false;
2420         }
2421 
2422         if (srcProfile->has_A2B) {
2423             if (srcProfile->A2B.input_channels) {
2424                 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2425                     OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2426                     if (oa.arg) {
2427                         *ops++  = oa.op;
2428                         *args++ = oa.arg;
2429                     }
2430                 }
2431                 *ops++ = Op_clamp;
2432                 *ops++ = Op_clut;
2433                 *args++ = &srcProfile->A2B;
2434             }
2435 
2436             if (srcProfile->A2B.matrix_channels == 3) {
2437                 for (int i = 0; i < 3; i++) {
2438                     OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2439                     if (oa.arg) {
2440                         *ops++  = oa.op;
2441                         *args++ = oa.arg;
2442                     }
2443                 }
2444 
2445                 static const skcms_Matrix3x4 I = {{
2446                     {1,0,0,0},
2447                     {0,1,0,0},
2448                     {0,0,1,0},
2449                 }};
2450                 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2451                     *ops++  = Op_matrix_3x4;
2452                     *args++ = &srcProfile->A2B.matrix;
2453                 }
2454             }
2455 
2456             if (srcProfile->A2B.output_channels == 3) {
2457                 for (int i = 0; i < 3; i++) {
2458                     OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2459                     if (oa.arg) {
2460                         *ops++  = oa.op;
2461                         *args++ = oa.arg;
2462                     }
2463                 }
2464             }
2465 
2466             if (srcProfile->pcs == skcms_Signature_Lab) {
2467                 *ops++ = Op_lab_to_xyz;
2468             }
2469 
2470         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2471             for (int i = 0; i < 3; i++) {
2472                 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2473                 if (oa.arg) {
2474                     *ops++  = oa.op;
2475                     *args++ = oa.arg;
2476                 }
2477             }
2478         } else {
2479             return false;
2480         }
2481 
2482         // A2B sources should already be in XYZD50 at this point.
2483         // Others still need to be transformed using their toXYZD50 matrix.
2484         // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2485         // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2486         assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2487         static const skcms_Matrix3x3 I = {{
2488             { 1.0f, 0.0f, 0.0f },
2489             { 0.0f, 1.0f, 0.0f },
2490             { 0.0f, 0.0f, 1.0f },
2491         }};
2492         const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2493 
2494         // There's a chance the source and destination gamuts are identical,
2495         // in which case we can skip the gamut transform.
2496         if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2497             // Concat the entire gamut transform into from_xyz,
2498             // now slightly misnamed but it's a handy spot to stash the result.
2499             from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2500             *ops++  = Op_matrix_3x3;
2501             *args++ = &from_xyz;
2502         }
2503 
2504         // Encode back to dst RGB using its parametric transfer functions.
2505         for (int i = 0; i < 3; i++) {
2506             OpAndArg oa = select_curve_op(dst_curves+i, i);
2507             if (oa.arg) {
2508                 assert (oa.op != Op_table_r &&
2509                         oa.op != Op_table_g &&
2510                         oa.op != Op_table_b &&
2511                         oa.op != Op_table_a);
2512                 *ops++  = oa.op;
2513                 *args++ = oa.arg;
2514             }
2515         }
2516     }
2517 
2518     // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2519     // not just to values that fit in [0,1].
2520     //
2521     // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2522     // but would be carrying r > 1, which is really unexpected for downstream consumers.
2523     if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2524         *ops++ = Op_clamp;
2525     }
2526     if (dstAlpha == skcms_AlphaFormat_Opaque) {
2527         *ops++ = Op_force_opaque;
2528     } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2529         *ops++ = Op_premul;
2530     }
2531     if (dstFmt & 1) {
2532         *ops++ = Op_swap_rb;
2533     }
2534     switch (dstFmt >> 1) {
2535         default: return false;
2536         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_store_a8;         break;
2537         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_store_g8;         break;
2538         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_store_4444;       break;
2539         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_store_565;        break;
2540         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_store_888;        break;
2541         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_store_8888;       break;
2542         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_store_1010102;    break;
2543         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_store_161616LE;   break;
2544         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2545         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_store_161616BE;   break;
2546         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2547         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_store_hhh;        break;
2548         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_store_hhhh;       break;
2549         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_store_hhh;        break;
2550         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_store_hhhh;       break;
2551         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_store_fff;        break;
2552         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_store_ffff;       break;
2553 
2554         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2555             *ops++ = Op_tf_r;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2556             *ops++ = Op_tf_g;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2557             *ops++ = Op_tf_b;       *args++ = skcms_sRGB_Inverse_TransferFunction();
2558             *ops++ = Op_store_8888;
2559             break;
2560     }
2561 
2562     auto run = baseline::run_program;
2563 #if defined(TEST_FOR_HSW)
2564     switch (cpu_type()) {
2565         case CpuType::None:                        break;
2566         case CpuType::HSW: run = hsw::run_program; break;
2567         case CpuType::SKX: run = hsw::run_program; break;
2568     }
2569 #endif
2570 #if defined(TEST_FOR_SKX)
2571     switch (cpu_type()) {
2572         case CpuType::None:                        break;
2573         case CpuType::HSW:                         break;
2574         case CpuType::SKX: run = skx::run_program; break;
2575     }
2576 #endif
2577     run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2578     return true;
2579 }
2580 
assert_usable_as_destination(const skcms_ICCProfile * profile)2581 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2582 #if defined(NDEBUG)
2583     (void)profile;
2584 #else
2585     skcms_Matrix3x3 fromXYZD50;
2586     skcms_TransferFunction invR, invG, invB;
2587     assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2588 #endif
2589 }
2590 
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2591 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2592     skcms_Matrix3x3 fromXYZD50;
2593     if (!profile->has_trc || !profile->has_toXYZD50
2594         || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2595         return false;
2596     }
2597 
2598     skcms_TransferFunction tf[3];
2599     for (int i = 0; i < 3; i++) {
2600         skcms_TransferFunction inv;
2601         if (profile->trc[i].table_entries == 0
2602             && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2603             tf[i] = profile->trc[i].parametric;
2604             continue;
2605         }
2606 
2607         float max_error;
2608         // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2609         if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2610             return false;
2611         }
2612     }
2613 
2614     for (int i = 0; i < 3; ++i) {
2615         profile->trc[i].table_entries = 0;
2616         profile->trc[i].parametric = tf[i];
2617     }
2618 
2619     assert_usable_as_destination(profile);
2620     return true;
2621 }
2622 
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2623 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2624     // Operate on a copy of profile, so we can choose the best TF for the original curves
2625     skcms_ICCProfile result = *profile;
2626     if (!skcms_MakeUsableAsDestination(&result)) {
2627         return false;
2628     }
2629 
2630     int best_tf = 0;
2631     float min_max_error = INFINITY_;
2632     for (int i = 0; i < 3; i++) {
2633         skcms_TransferFunction inv;
2634         if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2635             return false;
2636         }
2637 
2638         float err = 0;
2639         for (int j = 0; j < 3; ++j) {
2640             err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2641         }
2642         if (min_max_error > err) {
2643             min_max_error = err;
2644             best_tf = i;
2645         }
2646     }
2647 
2648     for (int i = 0; i < 3; i++) {
2649         result.trc[i].parametric = result.trc[best_tf].parametric;
2650     }
2651 
2652     *profile = result;
2653     assert_usable_as_destination(profile);
2654     return true;
2655 }
2656