1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #if defined(__ARM_NEON)
17     #include <arm_neon.h>
18 #elif defined(__SSE__)
19     #include <immintrin.h>
20 
21     #if defined(__clang__)
22         // That #include <immintrin.h> is usually enough, but Clang's headers
23         // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24         // defined, because lots of programs on Windows would include that and
25         // it'd be a lot slower.  But we want all those headers included so we
26         // can use their features after runtime checks later.
27         #include <smmintrin.h>
28         #include <avxintrin.h>
29         #include <avx2intrin.h>
30         #include <avx512fintrin.h>
31         #include <avx512dqintrin.h>
32     #endif
33 #else
34     #define SKCMS_PORTABLE
35 #endif
36 
37 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
38 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
39 //
40 // Please do not use sizeof() directly, and size_t only when required.
41 // (We have no way of enforcing these requests...)
42 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
43 
44 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
45 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
46 
47 static const union {
48     uint32_t bits;
49     float    f;
50 } inf_ = { 0x7f800000 };
51 #define INFINITY_ inf_.f
52 
53 #if defined(__clang__) || defined(__GNUC__)
54     #define small_memcpy __builtin_memcpy
55 #else
56     #define small_memcpy memcpy
57 #endif
58 
log2f_(float x)59 static float log2f_(float x) {
60     // The first approximation of log2(x) is its exponent 'e', minus 127.
61     int32_t bits;
62     small_memcpy(&bits, &x, sizeof(bits));
63 
64     float e = (float)bits * (1.0f / (1<<23));
65 
66     // If we use the mantissa too we can refine the error signficantly.
67     int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
68     float m;
69     small_memcpy(&m, &m_bits, sizeof(m));
70 
71     return (e - 124.225514990f
72               -   1.498030302f*m
73               -   1.725879990f/(0.3520887068f + m));
74 }
logf_(float x)75 static float logf_(float x) {
76     const float ln2 = 0.69314718f;
77     return ln2*log2f_(x);
78 }
79 
exp2f_(float x)80 static float exp2f_(float x) {
81     float fract = x - floorf_(x);
82 
83     float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
84                                         -   1.490129070f*fract
85                                         +  27.728023300f/(4.84252568f - fract));
86 
87     // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
88     // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
89     // INT_MIN is a power of 2 and exactly representable as a float, so it's fine.
90     if (fbits >= (float)INT_MAX) {
91         return INFINITY_;
92     } else if (fbits < (float)INT_MIN) {
93         return -INFINITY_;
94     }
95 
96     int32_t bits = (int32_t)fbits;
97     small_memcpy(&x, &bits, sizeof(x));
98     return x;
99 }
100 
101 // Not static, as it's used by some test tools.
powf_(float x,float y)102 float powf_(float x, float y) {
103     assert (x >= 0);
104     return (x == 0) || (x == 1) ? x
105                                 : exp2f_(log2f_(x) * y);
106 }
107 
expf_(float x)108 static float expf_(float x) {
109     const float log2_e = 1.4426950408889634074f;
110     return exp2f_(log2_e * x);
111 }
112 
fmaxf_(float x,float y)113 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)114 static float fminf_(float x, float y) { return x < y ? x : y; }
115 
isfinitef_(float x)116 static bool isfinitef_(float x) { return 0 == x*0; }
117 
minus_1_ulp(float x)118 static float minus_1_ulp(float x) {
119     int32_t bits;
120     memcpy(&bits, &x, sizeof(bits));
121     bits = bits - 1;
122     memcpy(&x, &bits, sizeof(bits));
123     return x;
124 }
125 
126 // Most transfer functions we work with are sRGBish.
127 // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
128 // and repurpose the other fields to hold the parameters of the HDR functions.
129 enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
130 struct TF_PQish  { float A,B,C,D,E,F; };
131 struct TF_HLGish { float R,G,a,b,c; };
132 
TFKind_marker(TFKind kind)133 static float TFKind_marker(TFKind kind) {
134     // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
135     return -(float)kind;
136 }
137 
classify(const skcms_TransferFunction & tf,TF_PQish * pq=nullptr,TF_HLGish * hlg=nullptr)138 static TFKind classify(const skcms_TransferFunction& tf, TF_PQish*   pq = nullptr
139                                                        , TF_HLGish* hlg = nullptr) {
140     if (tf.g < 0 && (int)tf.g == tf.g) {
141         // TODO: sanity checks for PQ/HLG like we do for sRGBish.
142         switch (-(int)tf.g) {
143             case PQish:     if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
144             case HLGish:    if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
145             case HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
146         }
147         return Bad;
148     }
149 
150     // Basic sanity checks for sRGBish transfer functions.
151     if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
152             // a,c,d,g should be non-negative to make any sense.
153             && tf.a >= 0
154             && tf.c >= 0
155             && tf.d >= 0
156             && tf.g >= 0
157             // Raising a negative value to a fractional tf->g produces complex numbers.
158             && tf.a * tf.d + tf.b >= 0) {
159         return sRGBish;
160     }
161 
162     return Bad;
163 }
164 
skcms_TransferFunction_makePQish(skcms_TransferFunction * tf,float A,float B,float C,float D,float E,float F)165 bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
166                                       float A, float B, float C,
167                                       float D, float E, float F) {
168     *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
169     assert(classify(*tf) == PQish);
170     return true;
171 }
172 
skcms_TransferFunction_makeHLGish(skcms_TransferFunction * tf,float R,float G,float a,float b,float c)173 bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf,
174                                        float R, float G,
175                                        float a, float b, float c) {
176     *tf = { TFKind_marker(HLGish), R,G, a,b,c, 0 };
177     assert(classify(*tf) == HLGish);
178     return true;
179 }
180 
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)181 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
182     float sign = x < 0 ? -1.0f : 1.0f;
183     x *= sign;
184 
185     TF_PQish  pq;
186     TF_HLGish hlg;
187     switch (classify(*tf, &pq, &hlg)) {
188         case Bad:       break;
189 
190         case HLGish:    return sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
191                                                     : expf_((x-hlg.c)*hlg.a) + hlg.b);
192 
193         // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
194         case HLGinvish: return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
195                                               : hlg.a * logf_(x - hlg.b) + hlg.c);
196 
197 
198         case sRGBish: return sign * (x < tf->d ?       tf->c * x + tf->f
199                                                : powf_(tf->a * x + tf->b, tf->g) + tf->e);
200 
201         case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
202                                             / (pq.D + pq.E * powf_(x, pq.C)),
203                                         pq.F);
204     }
205     return 0;
206 }
207 
208 
eval_curve(const skcms_Curve * curve,float x)209 static float eval_curve(const skcms_Curve* curve, float x) {
210     if (curve->table_entries == 0) {
211         return skcms_TransferFunction_eval(&curve->parametric, x);
212     }
213 
214     float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
215     int   lo = (int)                   ix        ,
216           hi = (int)(float)minus_1_ulp(ix + 1.0f);
217     float t = ix - (float)lo;
218 
219     float l, h;
220     if (curve->table_8) {
221         l = curve->table_8[lo] * (1/255.0f);
222         h = curve->table_8[hi] * (1/255.0f);
223     } else {
224         uint16_t be_l, be_h;
225         memcpy(&be_l, curve->table_16 + 2*lo, 2);
226         memcpy(&be_h, curve->table_16 + 2*hi, 2);
227         uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
228         uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
229         l = le_l * (1/65535.0f);
230         h = le_h * (1/65535.0f);
231     }
232     return l + (h-l)*t;
233 }
234 
max_roundtrip_error(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)235 static float max_roundtrip_error(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
236     uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
237     const float dx = 1.0f / (N - 1);
238     float err = 0;
239     for (uint32_t i = 0; i < N; i++) {
240         float x = i * dx,
241               y = eval_curve(curve, x);
242         err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
243     }
244     return err;
245 }
246 
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)247 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
248     return max_roundtrip_error(curve, inv_tf) < (1/512.0f);
249 }
250 
251 // Additional ICC signature values that are only used internally
252 enum {
253     // File signature
254     skcms_Signature_acsp = 0x61637370,
255 
256     // Tag signatures
257     skcms_Signature_rTRC = 0x72545243,
258     skcms_Signature_gTRC = 0x67545243,
259     skcms_Signature_bTRC = 0x62545243,
260     skcms_Signature_kTRC = 0x6B545243,
261 
262     skcms_Signature_rXYZ = 0x7258595A,
263     skcms_Signature_gXYZ = 0x6758595A,
264     skcms_Signature_bXYZ = 0x6258595A,
265 
266     skcms_Signature_A2B0 = 0x41324230,
267     skcms_Signature_A2B1 = 0x41324231,
268     skcms_Signature_mAB  = 0x6D414220,
269 
270     skcms_Signature_CHAD = 0x63686164,
271 
272     // Type signatures
273     skcms_Signature_curv = 0x63757276,
274     skcms_Signature_mft1 = 0x6D667431,
275     skcms_Signature_mft2 = 0x6D667432,
276     skcms_Signature_para = 0x70617261,
277     skcms_Signature_sf32 = 0x73663332,
278     // XYZ is also a PCS signature, so it's defined in skcms.h
279     // skcms_Signature_XYZ = 0x58595A20,
280 };
281 
read_big_u16(const uint8_t * ptr)282 static uint16_t read_big_u16(const uint8_t* ptr) {
283     uint16_t be;
284     memcpy(&be, ptr, sizeof(be));
285 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
286     return be;
287 #else
288     #if defined(_MSC_VER)
289     return _byteswap_ushort(be);
290     #else
291     return __builtin_bswap16(be);
292     #endif
293 #endif
294 }
295 
read_big_u32(const uint8_t * ptr)296 static uint32_t read_big_u32(const uint8_t* ptr) {
297     uint32_t be;
298     memcpy(&be, ptr, sizeof(be));
299 #if __BYTE_ORDER == __ORDER_BIG_ENDIAN__
300     return be;
301 #else
302     #if defined(_MSC_VER)
303     return _byteswap_ulong(be);
304     #else
305     return __builtin_bswap32(be);
306     #endif
307 #endif
308 }
309 
read_big_i32(const uint8_t * ptr)310 static int32_t read_big_i32(const uint8_t* ptr) {
311     return (int32_t)read_big_u32(ptr);
312 }
313 
read_big_fixed(const uint8_t * ptr)314 static float read_big_fixed(const uint8_t* ptr) {
315     return read_big_i32(ptr) * (1.0f / 65536.0f);
316 }
317 
318 // Maps to an in-memory profile so that fields line up to the locations specified
319 // in ICC.1:2010, section 7.2
320 typedef struct {
321     uint8_t size                [ 4];
322     uint8_t cmm_type            [ 4];
323     uint8_t version             [ 4];
324     uint8_t profile_class       [ 4];
325     uint8_t data_color_space    [ 4];
326     uint8_t pcs                 [ 4];
327     uint8_t creation_date_time  [12];
328     uint8_t signature           [ 4];
329     uint8_t platform            [ 4];
330     uint8_t flags               [ 4];
331     uint8_t device_manufacturer [ 4];
332     uint8_t device_model        [ 4];
333     uint8_t device_attributes   [ 8];
334     uint8_t rendering_intent    [ 4];
335     uint8_t illuminant_X        [ 4];
336     uint8_t illuminant_Y        [ 4];
337     uint8_t illuminant_Z        [ 4];
338     uint8_t creator             [ 4];
339     uint8_t profile_id          [16];
340     uint8_t reserved            [28];
341     uint8_t tag_count           [ 4]; // Technically not part of header, but required
342 } header_Layout;
343 
344 typedef struct {
345     uint8_t signature [4];
346     uint8_t offset    [4];
347     uint8_t size      [4];
348 } tag_Layout;
349 
get_tag_table(const skcms_ICCProfile * profile)350 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
351     return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
352 }
353 
354 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
355 // use of the type is for the CHAD tag that stores exactly nine values.
356 typedef struct {
357     uint8_t type     [ 4];
358     uint8_t reserved [ 4];
359     uint8_t values   [36];
360 } sf32_Layout;
361 
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)362 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
363     skcms_ICCTag tag;
364     if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
365         return false;
366     }
367 
368     if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
369         return false;
370     }
371 
372     const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
373     const uint8_t* values = sf32Tag->values;
374     for (int r = 0; r < 3; ++r)
375     for (int c = 0; c < 3; ++c, values += 4) {
376         m->vals[r][c] = read_big_fixed(values);
377     }
378     return true;
379 }
380 
381 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
382 // the type are for tags/data that store exactly one triple.
383 typedef struct {
384     uint8_t type     [4];
385     uint8_t reserved [4];
386     uint8_t X        [4];
387     uint8_t Y        [4];
388     uint8_t Z        [4];
389 } XYZ_Layout;
390 
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)391 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
392     if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
393         return false;
394     }
395 
396     const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
397 
398     *x = read_big_fixed(xyzTag->X);
399     *y = read_big_fixed(xyzTag->Y);
400     *z = read_big_fixed(xyzTag->Z);
401     return true;
402 }
403 
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)404 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
405                            const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
406     return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
407            read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
408            read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
409 }
410 
411 typedef struct {
412     uint8_t type          [4];
413     uint8_t reserved_a    [4];
414     uint8_t function_type [2];
415     uint8_t reserved_b    [2];
416     uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
417 } para_Layout;
418 
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)419 static bool read_curve_para(const uint8_t* buf, uint32_t size,
420                             skcms_Curve* curve, uint32_t* curve_size) {
421     if (size < SAFE_FIXED_SIZE(para_Layout)) {
422         return false;
423     }
424 
425     const para_Layout* paraTag = (const para_Layout*)buf;
426 
427     enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
428     uint16_t function_type = read_big_u16(paraTag->function_type);
429     if (function_type > kGABCDEF) {
430         return false;
431     }
432 
433     static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
434     if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
435         return false;
436     }
437 
438     if (curve_size) {
439         *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
440     }
441 
442     curve->table_entries = 0;
443     curve->parametric.a  = 1.0f;
444     curve->parametric.b  = 0.0f;
445     curve->parametric.c  = 0.0f;
446     curve->parametric.d  = 0.0f;
447     curve->parametric.e  = 0.0f;
448     curve->parametric.f  = 0.0f;
449     curve->parametric.g  = read_big_fixed(paraTag->variable);
450 
451     switch (function_type) {
452         case kGAB:
453             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
454             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
455             if (curve->parametric.a == 0) {
456                 return false;
457             }
458             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
459             break;
460         case kGABC:
461             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
462             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
463             curve->parametric.e = read_big_fixed(paraTag->variable + 12);
464             if (curve->parametric.a == 0) {
465                 return false;
466             }
467             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
468             curve->parametric.f = curve->parametric.e;
469             break;
470         case kGABCD:
471             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
472             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
473             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
474             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
475             break;
476         case kGABCDEF:
477             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
478             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
479             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
480             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
481             curve->parametric.e = read_big_fixed(paraTag->variable + 20);
482             curve->parametric.f = read_big_fixed(paraTag->variable + 24);
483             break;
484     }
485     return classify(curve->parametric) == sRGBish;
486 }
487 
488 typedef struct {
489     uint8_t type          [4];
490     uint8_t reserved      [4];
491     uint8_t value_count   [4];
492     uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
493 } curv_Layout;
494 
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)495 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
496                             skcms_Curve* curve, uint32_t* curve_size) {
497     if (size < SAFE_FIXED_SIZE(curv_Layout)) {
498         return false;
499     }
500 
501     const curv_Layout* curvTag = (const curv_Layout*)buf;
502 
503     uint32_t value_count = read_big_u32(curvTag->value_count);
504     if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
505         return false;
506     }
507 
508     if (curve_size) {
509         *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
510     }
511 
512     if (value_count < 2) {
513         curve->table_entries = 0;
514         curve->parametric.a  = 1.0f;
515         curve->parametric.b  = 0.0f;
516         curve->parametric.c  = 0.0f;
517         curve->parametric.d  = 0.0f;
518         curve->parametric.e  = 0.0f;
519         curve->parametric.f  = 0.0f;
520         if (value_count == 0) {
521             // Empty tables are a shorthand for an identity curve
522             curve->parametric.g = 1.0f;
523         } else {
524             // Single entry tables are a shorthand for simple gamma
525             curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
526         }
527     } else {
528         curve->table_8       = nullptr;
529         curve->table_16      = curvTag->variable;
530         curve->table_entries = value_count;
531     }
532 
533     return true;
534 }
535 
536 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
537 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)538 static bool read_curve(const uint8_t* buf, uint32_t size,
539                        skcms_Curve* curve, uint32_t* curve_size) {
540     if (!buf || size < 4 || !curve) {
541         return false;
542     }
543 
544     uint32_t type = read_big_u32(buf);
545     if (type == skcms_Signature_para) {
546         return read_curve_para(buf, size, curve, curve_size);
547     } else if (type == skcms_Signature_curv) {
548         return read_curve_curv(buf, size, curve, curve_size);
549     }
550 
551     return false;
552 }
553 
554 // mft1 and mft2 share a large chunk of data
555 typedef struct {
556     uint8_t type                 [ 4];
557     uint8_t reserved_a           [ 4];
558     uint8_t input_channels       [ 1];
559     uint8_t output_channels      [ 1];
560     uint8_t grid_points          [ 1];
561     uint8_t reserved_b           [ 1];
562     uint8_t matrix               [36];
563 } mft_CommonLayout;
564 
565 typedef struct {
566     mft_CommonLayout common      [1];
567 
568     uint8_t variable             [1/*variable*/];
569 } mft1_Layout;
570 
571 typedef struct {
572     mft_CommonLayout common      [1];
573 
574     uint8_t input_table_entries  [2];
575     uint8_t output_table_entries [2];
576     uint8_t variable             [1/*variable*/];
577 } mft2_Layout;
578 
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)579 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
580     // MFT matrices are applied before the first set of curves, but must be identity unless the
581     // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
582     // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
583     // field/flag.
584     a2b->matrix_channels = 0;
585 
586     a2b->input_channels  = mftTag->input_channels[0];
587     a2b->output_channels = mftTag->output_channels[0];
588 
589     // We require exactly three (ie XYZ/Lab/RGB) output channels
590     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
591         return false;
592     }
593     // We require at least one, and no more than four (ie CMYK) input channels
594     if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
595         return false;
596     }
597 
598     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
599         a2b->grid_points[i] = mftTag->grid_points[0];
600     }
601     // The grid only makes sense with at least two points along each axis
602     if (a2b->grid_points[0] < 2) {
603         return false;
604     }
605 
606     return true;
607 }
608 
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)609 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
610                             uint32_t input_table_entries, uint32_t output_table_entries,
611                             skcms_A2B* a2b) {
612     // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
613     uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
614     uint32_t byte_len_per_output_table = output_table_entries * byte_width;
615 
616     // [input|output]_channels are <= 4, so still no overflow
617     uint32_t byte_len_all_input_tables  = a2b->input_channels * byte_len_per_input_table;
618     uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
619 
620     uint64_t grid_size = a2b->output_channels * byte_width;
621     for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
622         grid_size *= a2b->grid_points[axis];
623     }
624 
625     if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
626         return false;
627     }
628 
629     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
630         a2b->input_curves[i].table_entries = input_table_entries;
631         if (byte_width == 1) {
632             a2b->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
633             a2b->input_curves[i].table_16 = nullptr;
634         } else {
635             a2b->input_curves[i].table_8  = nullptr;
636             a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
637         }
638     }
639 
640     if (byte_width == 1) {
641         a2b->grid_8  = table_base + byte_len_all_input_tables;
642         a2b->grid_16 = nullptr;
643     } else {
644         a2b->grid_8  = nullptr;
645         a2b->grid_16 = table_base + byte_len_all_input_tables;
646     }
647 
648     const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
649     for (uint32_t i = 0; i < a2b->output_channels; ++i) {
650         a2b->output_curves[i].table_entries = output_table_entries;
651         if (byte_width == 1) {
652             a2b->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
653             a2b->output_curves[i].table_16 = nullptr;
654         } else {
655             a2b->output_curves[i].table_8  = nullptr;
656             a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
657         }
658     }
659 
660     return true;
661 }
662 
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)663 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
664     if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
665         return false;
666     }
667 
668     const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
669     if (!read_mft_common(mftTag->common, a2b)) {
670         return false;
671     }
672 
673     uint32_t input_table_entries  = 256;
674     uint32_t output_table_entries = 256;
675 
676     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
677                            input_table_entries, output_table_entries, a2b);
678 }
679 
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)680 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
681     if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
682         return false;
683     }
684 
685     const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
686     if (!read_mft_common(mftTag->common, a2b)) {
687         return false;
688     }
689 
690     uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
691     uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
692 
693     // ICC spec mandates that 2 <= table_entries <= 4096
694     if (input_table_entries < 2 || input_table_entries > 4096 ||
695         output_table_entries < 2 || output_table_entries > 4096) {
696         return false;
697     }
698 
699     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
700                            input_table_entries, output_table_entries, a2b);
701 }
702 
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)703 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
704                         uint32_t num_curves, skcms_Curve* curves) {
705     for (uint32_t i = 0; i < num_curves; ++i) {
706         if (curve_offset > size) {
707             return false;
708         }
709 
710         uint32_t curve_bytes;
711         if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
712             return false;
713         }
714 
715         if (curve_bytes > UINT32_MAX - 3) {
716             return false;
717         }
718         curve_bytes = (curve_bytes + 3) & ~3U;
719 
720         uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
721         curve_offset = (uint32_t)new_offset_64;
722         if (new_offset_64 != curve_offset) {
723             return false;
724         }
725     }
726 
727     return true;
728 }
729 
730 typedef struct {
731     uint8_t type                 [ 4];
732     uint8_t reserved_a           [ 4];
733     uint8_t input_channels       [ 1];
734     uint8_t output_channels      [ 1];
735     uint8_t reserved_b           [ 2];
736     uint8_t b_curve_offset       [ 4];
737     uint8_t matrix_offset        [ 4];
738     uint8_t m_curve_offset       [ 4];
739     uint8_t clut_offset          [ 4];
740     uint8_t a_curve_offset       [ 4];
741 } mAB_Layout;
742 
743 typedef struct {
744     uint8_t grid_points          [16];
745     uint8_t grid_byte_width      [ 1];
746     uint8_t reserved             [ 3];
747     uint8_t variable             [1/*variable*/];
748 } mABCLUT_Layout;
749 
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)750 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
751     if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
752         return false;
753     }
754 
755     const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
756 
757     a2b->input_channels  = mABTag->input_channels[0];
758     a2b->output_channels = mABTag->output_channels[0];
759 
760     // We require exactly three (ie XYZ/Lab/RGB) output channels
761     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
762         return false;
763     }
764     // We require no more than four (ie CMYK) input channels
765     if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
766         return false;
767     }
768 
769     uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
770     uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
771     uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
772     uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
773     uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
774 
775     // "B" curves must be present
776     if (0 == b_curve_offset) {
777         return false;
778     }
779 
780     if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
781                      a2b->output_curves)) {
782         return false;
783     }
784 
785     // "M" curves and Matrix must be used together
786     if (0 != m_curve_offset) {
787         if (0 == matrix_offset) {
788             return false;
789         }
790         a2b->matrix_channels = a2b->output_channels;
791         if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
792                          a2b->matrix_curves)) {
793             return false;
794         }
795 
796         // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
797         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
798             return false;
799         }
800         float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
801         const uint8_t* mtx_buf = tag->buf + matrix_offset;
802         a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
803         a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
804         a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
805         a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
806         a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
807         a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
808         a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
809         a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
810         a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
811         a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
812         a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
813         a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
814     } else {
815         if (0 != matrix_offset) {
816             return false;
817         }
818         a2b->matrix_channels = 0;
819     }
820 
821     // "A" curves and CLUT must be used together
822     if (0 != a_curve_offset) {
823         if (0 == clut_offset) {
824             return false;
825         }
826         if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
827                          a2b->input_curves)) {
828             return false;
829         }
830 
831         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
832             return false;
833         }
834         const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
835 
836         if (clut->grid_byte_width[0] == 1) {
837             a2b->grid_8  = clut->variable;
838             a2b->grid_16 = nullptr;
839         } else if (clut->grid_byte_width[0] == 2) {
840             a2b->grid_8  = nullptr;
841             a2b->grid_16 = clut->variable;
842         } else {
843             return false;
844         }
845 
846         uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
847         for (uint32_t i = 0; i < a2b->input_channels; ++i) {
848             a2b->grid_points[i] = clut->grid_points[i];
849             // The grid only makes sense with at least two points along each axis
850             if (a2b->grid_points[i] < 2) {
851                 return false;
852             }
853             grid_size *= a2b->grid_points[i];
854         }
855         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
856             return false;
857         }
858     } else {
859         if (0 != clut_offset) {
860             return false;
861         }
862 
863         // If there is no CLUT, the number of input and output channels must match
864         if (a2b->input_channels != a2b->output_channels) {
865             return false;
866         }
867 
868         // Zero out the number of input channels to signal that we're skipping this stage
869         a2b->input_channels = 0;
870     }
871 
872     return true;
873 }
874 
875 // If you pass f, we'll fit a possibly-non-zero value for *f.
876 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)877 static int fit_linear(const skcms_Curve* curve, int N, float tol,
878                       float* c, float* d, float* f = nullptr) {
879     assert(N > 1);
880     // We iteratively fit the first points to the TF's linear piece.
881     // We want the cx + f line to pass through the first and last points we fit exactly.
882     //
883     // As we walk along the points we find the minimum and maximum slope of the line before the
884     // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
885     // emtpy, when we definitely can't add any more points.
886     //
887     // Some points' error intervals may intersect the running interval but not lie fully
888     // within it.  So we keep track of the last point we saw that is a valid end point candidate,
889     // and once the search is done, back up to build the line through *that* point.
890     const float dx = 1.0f / (N - 1);
891 
892     int lin_points = 1;
893 
894     float f_zero = 0.0f;
895     if (f) {
896         *f = eval_curve(curve, 0);
897     } else {
898         f = &f_zero;
899     }
900 
901 
902     float slope_min = -INFINITY_;
903     float slope_max = +INFINITY_;
904     for (int i = 1; i < N; ++i) {
905         float x = i * dx;
906         float y = eval_curve(curve, x);
907 
908         float slope_max_i = (y + tol - *f) / x,
909               slope_min_i = (y - tol - *f) / x;
910         if (slope_max_i < slope_min || slope_max < slope_min_i) {
911             // Slope intervals would no longer overlap.
912             break;
913         }
914         slope_max = fminf_(slope_max, slope_max_i);
915         slope_min = fmaxf_(slope_min, slope_min_i);
916 
917         float cur_slope = (y - *f) / x;
918         if (slope_min <= cur_slope && cur_slope <= slope_max) {
919             lin_points = i + 1;
920             *c = cur_slope;
921         }
922     }
923 
924     // Set D to the last point that met our tolerance.
925     *d = (lin_points - 1) * dx;
926     return lin_points;
927 }
928 
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)929 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
930     bool ok = false;
931     if (tag->type == skcms_Signature_mft1) {
932         ok = read_tag_mft1(tag, a2b);
933     } else if (tag->type == skcms_Signature_mft2) {
934         ok = read_tag_mft2(tag, a2b);
935     } else if (tag->type == skcms_Signature_mAB) {
936         ok = read_tag_mab(tag, a2b, pcs_is_xyz);
937     }
938     if (!ok) {
939         return false;
940     }
941 
942     // Detect and canonicalize identity tables.
943     skcms_Curve* curves[] = {
944         a2b->input_channels  > 0 ? a2b->input_curves  + 0 : nullptr,
945         a2b->input_channels  > 1 ? a2b->input_curves  + 1 : nullptr,
946         a2b->input_channels  > 2 ? a2b->input_curves  + 2 : nullptr,
947         a2b->input_channels  > 3 ? a2b->input_curves  + 3 : nullptr,
948         a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
949         a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
950         a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
951         a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
952         a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
953         a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
954     };
955 
956     for (int i = 0; i < ARRAY_COUNT(curves); i++) {
957         skcms_Curve* curve = curves[i];
958 
959         if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
960             int N = (int)curve->table_entries;
961 
962             float c = 0.0f, d = 0.0f, f = 0.0f;
963             if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
964                 && c == 1.0f
965                 && f == 0.0f) {
966                 curve->table_entries = 0;
967                 curve->table_8       = nullptr;
968                 curve->table_16      = nullptr;
969                 curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
970             }
971         }
972     }
973 
974     return true;
975 }
976 
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)977 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
978     if (!profile || !profile->buffer || !tag) { return; }
979     if (idx > profile->tag_count) { return; }
980     const tag_Layout* tags = get_tag_table(profile);
981     tag->signature = read_big_u32(tags[idx].signature);
982     tag->size      = read_big_u32(tags[idx].size);
983     tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
984     tag->type      = read_big_u32(tag->buf);
985 }
986 
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)987 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
988     if (!profile || !profile->buffer || !tag) { return false; }
989     const tag_Layout* tags = get_tag_table(profile);
990     for (uint32_t i = 0; i < profile->tag_count; ++i) {
991         if (read_big_u32(tags[i].signature) == sig) {
992             tag->signature = sig;
993             tag->size      = read_big_u32(tags[i].size);
994             tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
995             tag->type      = read_big_u32(tag->buf);
996             return true;
997         }
998     }
999     return false;
1000 }
1001 
usable_as_src(const skcms_ICCProfile * profile)1002 static bool usable_as_src(const skcms_ICCProfile* profile) {
1003     return profile->has_A2B
1004        || (profile->has_trc && profile->has_toXYZD50);
1005 }
1006 
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)1007 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
1008     assert(SAFE_SIZEOF(header_Layout) == 132);
1009 
1010     if (!profile) {
1011         return false;
1012     }
1013     memset(profile, 0, SAFE_SIZEOF(*profile));
1014 
1015     if (len < SAFE_SIZEOF(header_Layout)) {
1016         return false;
1017     }
1018 
1019     // Byte-swap all header fields
1020     const header_Layout* header  = (const header_Layout*)buf;
1021     profile->buffer              = (const uint8_t*)buf;
1022     profile->size                = read_big_u32(header->size);
1023     uint32_t version             = read_big_u32(header->version);
1024     profile->data_color_space    = read_big_u32(header->data_color_space);
1025     profile->pcs                 = read_big_u32(header->pcs);
1026     uint32_t signature           = read_big_u32(header->signature);
1027     float illuminant_X           = read_big_fixed(header->illuminant_X);
1028     float illuminant_Y           = read_big_fixed(header->illuminant_Y);
1029     float illuminant_Z           = read_big_fixed(header->illuminant_Z);
1030     profile->tag_count           = read_big_u32(header->tag_count);
1031 
1032     // Validate signature, size (smaller than buffer, large enough to hold tag table),
1033     // and major version
1034     uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1035     if (signature != skcms_Signature_acsp ||
1036         profile->size > len ||
1037         profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1038         (version >> 24) > 4) {
1039         return false;
1040     }
1041 
1042     // Validate that illuminant is D50 white
1043     if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1044         fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1045         fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1046         return false;
1047     }
1048 
1049     // Validate that all tag entries have sane offset + size
1050     const tag_Layout* tags = get_tag_table(profile);
1051     for (uint32_t i = 0; i < profile->tag_count; ++i) {
1052         uint32_t tag_offset = read_big_u32(tags[i].offset);
1053         uint32_t tag_size   = read_big_u32(tags[i].size);
1054         uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
1055         if (tag_size < 4 || tag_end > profile->size) {
1056             return false;
1057         }
1058     }
1059 
1060     if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1061         return false;
1062     }
1063 
1064     bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1065 
1066     // Pre-parse commonly used tags.
1067     skcms_ICCTag kTRC;
1068     if (profile->data_color_space == skcms_Signature_Gray &&
1069         skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1070         if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1071             // Malformed tag
1072             return false;
1073         }
1074         profile->trc[1] = profile->trc[0];
1075         profile->trc[2] = profile->trc[0];
1076         profile->has_trc = true;
1077 
1078         if (pcs_is_xyz) {
1079             profile->toXYZD50.vals[0][0] = illuminant_X;
1080             profile->toXYZD50.vals[1][1] = illuminant_Y;
1081             profile->toXYZD50.vals[2][2] = illuminant_Z;
1082             profile->has_toXYZD50 = true;
1083         }
1084     } else {
1085         skcms_ICCTag rTRC, gTRC, bTRC;
1086         if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1087             skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1088             skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1089             if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1090                 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1091                 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1092                 // Malformed TRC tags
1093                 return false;
1094             }
1095             profile->has_trc = true;
1096         }
1097 
1098         skcms_ICCTag rXYZ, gXYZ, bXYZ;
1099         if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1100             skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1101             skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1102             if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1103                 // Malformed XYZ tags
1104                 return false;
1105             }
1106             profile->has_toXYZD50 = true;
1107         }
1108     }
1109 
1110     skcms_ICCTag a2b_tag;
1111 
1112     // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
1113     // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
1114     // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
1115     // and all our known users are thinking exclusively in terms of relative colormetric.
1116     const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
1117     for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
1118         if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
1119             if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
1120                 // Malformed A2B tag
1121                 return false;
1122             }
1123             profile->has_A2B = true;
1124             break;
1125         }
1126     }
1127 
1128     return usable_as_src(profile);
1129 }
1130 
1131 
skcms_sRGB_profile()1132 const skcms_ICCProfile* skcms_sRGB_profile() {
1133     static const skcms_ICCProfile sRGB_profile = {
1134         nullptr,               // buffer, moot here
1135 
1136         0,                     // size, moot here
1137         skcms_Signature_RGB,   // data_color_space
1138         skcms_Signature_XYZ,   // pcs
1139         0,                     // tag count, moot here
1140 
1141         // We choose to represent sRGB with its canonical transfer function,
1142         // and with its canonical XYZD50 gamut matrix.
1143         true,  // has_trc, followed by the 3 trc curves
1144         {
1145             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1146             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1147             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1148         },
1149 
1150         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1151         {{
1152             { 0.436065674f, 0.385147095f, 0.143066406f },
1153             { 0.222488403f, 0.716873169f, 0.060607910f },
1154             { 0.013916016f, 0.097076416f, 0.714096069f },
1155         }},
1156 
1157         false, // has_A2B, followed by a2b itself which we don't care about.
1158         {
1159             0,
1160             {
1161                 {{0, {0,0, 0,0,0,0,0}}},
1162                 {{0, {0,0, 0,0,0,0,0}}},
1163                 {{0, {0,0, 0,0,0,0,0}}},
1164                 {{0, {0,0, 0,0,0,0,0}}},
1165             },
1166             {0,0,0,0},
1167             nullptr,
1168             nullptr,
1169 
1170             0,
1171             {
1172                 {{0, {0,0, 0,0,0,0,0}}},
1173                 {{0, {0,0, 0,0,0,0,0}}},
1174                 {{0, {0,0, 0,0,0,0,0}}},
1175             },
1176             {{
1177                 { 0,0,0,0 },
1178                 { 0,0,0,0 },
1179                 { 0,0,0,0 },
1180             }},
1181 
1182             0,
1183             {
1184                 {{0, {0,0, 0,0,0,0,0}}},
1185                 {{0, {0,0, 0,0,0,0,0}}},
1186                 {{0, {0,0, 0,0,0,0,0}}},
1187             },
1188         },
1189     };
1190     return &sRGB_profile;
1191 }
1192 
skcms_XYZD50_profile()1193 const skcms_ICCProfile* skcms_XYZD50_profile() {
1194     // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1195     static const skcms_ICCProfile XYZD50_profile = {
1196         nullptr,               // buffer, moot here
1197 
1198         0,                     // size, moot here
1199         skcms_Signature_RGB,   // data_color_space
1200         skcms_Signature_XYZ,   // pcs
1201         0,                     // tag count, moot here
1202 
1203         true,  // has_trc, followed by the 3 trc curves
1204         {
1205             {{0, {1,1, 0,0,0,0,0}}},
1206             {{0, {1,1, 0,0,0,0,0}}},
1207             {{0, {1,1, 0,0,0,0,0}}},
1208         },
1209 
1210         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1211         {{
1212             { 1,0,0 },
1213             { 0,1,0 },
1214             { 0,0,1 },
1215         }},
1216 
1217         false, // has_A2B, followed by a2b itself which we don't care about.
1218         {
1219             0,
1220             {
1221                 {{0, {0,0, 0,0,0,0,0}}},
1222                 {{0, {0,0, 0,0,0,0,0}}},
1223                 {{0, {0,0, 0,0,0,0,0}}},
1224                 {{0, {0,0, 0,0,0,0,0}}},
1225             },
1226             {0,0,0,0},
1227             nullptr,
1228             nullptr,
1229 
1230             0,
1231             {
1232                 {{0, {0,0, 0,0,0,0,0}}},
1233                 {{0, {0,0, 0,0,0,0,0}}},
1234                 {{0, {0,0, 0,0,0,0,0}}},
1235             },
1236             {{
1237                 { 0,0,0,0 },
1238                 { 0,0,0,0 },
1239                 { 0,0,0,0 },
1240             }},
1241 
1242             0,
1243             {
1244                 {{0, {0,0, 0,0,0,0,0}}},
1245                 {{0, {0,0, 0,0,0,0,0}}},
1246                 {{0, {0,0, 0,0,0,0,0}}},
1247             },
1248         },
1249     };
1250 
1251     return &XYZD50_profile;
1252 }
1253 
skcms_sRGB_TransferFunction()1254 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1255     return &skcms_sRGB_profile()->trc[0].parametric;
1256 }
1257 
skcms_sRGB_Inverse_TransferFunction()1258 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1259     static const skcms_TransferFunction sRGB_inv =
1260         {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1261     return &sRGB_inv;
1262 }
1263 
skcms_Identity_TransferFunction()1264 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1265     static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1266     return &identity;
1267 }
1268 
1269 const uint8_t skcms_252_random_bytes[] = {
1270     8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1271     119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1272     154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1273     194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1274     108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1275     70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1276     137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1277     9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1278     129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1279     140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1280     219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1281     123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1282     189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1283     174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1284     2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1285     112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1286 };
1287 
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1288 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1289     // Test for exactly equal profiles first.
1290     if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1291         return true;
1292     }
1293 
1294     // For now this is the essentially the same strategy we use in test_only.c
1295     // for our skcms_Transform() smoke tests:
1296     //    1) transform A to XYZD50
1297     //    2) transform B to XYZD50
1298     //    3) return true if they're similar enough
1299     // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1300 
1301     // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1302     // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1303 
1304     if (A->data_color_space != B->data_color_space) {
1305         return false;
1306     }
1307 
1308     // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1309     // TODO: working with RGBA_8888 either way is probably fastest.
1310     skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1311     size_t npixels = 84;
1312     if (A->data_color_space == skcms_Signature_CMYK) {
1313         fmt = skcms_PixelFormat_RGBA_8888;
1314         npixels = 63;
1315     }
1316 
1317     // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1318     // use pre-canned results and skip that skcms_Transform() call?
1319     uint8_t dstA[252],
1320             dstB[252];
1321     if (!skcms_Transform(
1322                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1323                 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1324                 npixels)) {
1325         return false;
1326     }
1327     if (!skcms_Transform(
1328                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1329                 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1330                 npixels)) {
1331         return false;
1332     }
1333 
1334     // TODO: make sure this final check has reasonable codegen.
1335     for (size_t i = 0; i < 252; i++) {
1336         if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1337             return false;
1338         }
1339     }
1340     return true;
1341 }
1342 
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1343 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1344                                       const skcms_TransferFunction* inv_tf) {
1345     if (!profile || !profile->has_trc) {
1346         return false;
1347     }
1348 
1349     return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1350            skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1351            skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1352 }
1353 
is_zero_to_one(float x)1354 static bool is_zero_to_one(float x) {
1355     return 0 <= x && x <= 1;
1356 }
1357 
1358 typedef struct { float vals[3]; } skcms_Vector3;
1359 
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1360 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1361     skcms_Vector3 dst = {{0,0,0}};
1362     for (int row = 0; row < 3; ++row) {
1363         dst.vals[row] = m->vals[row][0] * v->vals[0]
1364                       + m->vals[row][1] * v->vals[1]
1365                       + m->vals[row][2] * v->vals[2];
1366     }
1367     return dst;
1368 }
1369 
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1370 bool skcms_PrimariesToXYZD50(float rx, float ry,
1371                              float gx, float gy,
1372                              float bx, float by,
1373                              float wx, float wy,
1374                              skcms_Matrix3x3* toXYZD50) {
1375     if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1376         !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1377         !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1378         !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1379         !toXYZD50) {
1380         return false;
1381     }
1382 
1383     // First, we need to convert xy values (primaries) to XYZ.
1384     skcms_Matrix3x3 primaries = {{
1385         { rx, gx, bx },
1386         { ry, gy, by },
1387         { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1388     }};
1389     skcms_Matrix3x3 primaries_inv;
1390     if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1391         return false;
1392     }
1393 
1394     // Assumes that Y is 1.0f.
1395     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1396     skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1397 
1398     skcms_Matrix3x3 toXYZ = {{
1399         { XYZ.vals[0],           0,           0 },
1400         {           0, XYZ.vals[1],           0 },
1401         {           0,           0, XYZ.vals[2] },
1402     }};
1403     toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1404 
1405     // Now convert toXYZ matrix to toXYZD50.
1406     skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1407 
1408     // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1409     // the matrices below.  The Bradford method is used by Adobe and is widely considered
1410     // to be the best.
1411     skcms_Matrix3x3 xyz_to_lms = {{
1412         {  0.8951f,  0.2664f, -0.1614f },
1413         { -0.7502f,  1.7135f,  0.0367f },
1414         {  0.0389f, -0.0685f,  1.0296f },
1415     }};
1416     skcms_Matrix3x3 lms_to_xyz = {{
1417         {  0.9869929f, -0.1470543f, 0.1599627f },
1418         {  0.4323053f,  0.5183603f, 0.0492912f },
1419         { -0.0085287f,  0.0400428f, 0.9684867f },
1420     }};
1421 
1422     skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1423     skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1424 
1425     skcms_Matrix3x3 DXtoD50 = {{
1426         { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1427         { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1428         { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1429     }};
1430     DXtoD50 = skcms_Matrix3x3_concat(&DXtoD50, &xyz_to_lms);
1431     DXtoD50 = skcms_Matrix3x3_concat(&lms_to_xyz, &DXtoD50);
1432 
1433     *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1434     return true;
1435 }
1436 
1437 
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1438 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1439     double a00 = src->vals[0][0],
1440            a01 = src->vals[1][0],
1441            a02 = src->vals[2][0],
1442            a10 = src->vals[0][1],
1443            a11 = src->vals[1][1],
1444            a12 = src->vals[2][1],
1445            a20 = src->vals[0][2],
1446            a21 = src->vals[1][2],
1447            a22 = src->vals[2][2];
1448 
1449     double b0 = a00*a11 - a01*a10,
1450            b1 = a00*a12 - a02*a10,
1451            b2 = a01*a12 - a02*a11,
1452            b3 = a20,
1453            b4 = a21,
1454            b5 = a22;
1455 
1456     double determinant = b0*b5
1457                        - b1*b4
1458                        + b2*b3;
1459 
1460     if (determinant == 0) {
1461         return false;
1462     }
1463 
1464     double invdet = 1.0 / determinant;
1465     if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1466         return false;
1467     }
1468 
1469     b0 *= invdet;
1470     b1 *= invdet;
1471     b2 *= invdet;
1472     b3 *= invdet;
1473     b4 *= invdet;
1474     b5 *= invdet;
1475 
1476     dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1477     dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1478     dst->vals[2][0] = (float)(        +     b2 );
1479     dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1480     dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1481     dst->vals[2][1] = (float)(        -     b1 );
1482     dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1483     dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1484     dst->vals[2][2] = (float)(        +     b0 );
1485 
1486     for (int r = 0; r < 3; ++r)
1487     for (int c = 0; c < 3; ++c) {
1488         if (!isfinitef_(dst->vals[r][c])) {
1489             return false;
1490         }
1491     }
1492     return true;
1493 }
1494 
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1495 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1496     skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1497     for (int r = 0; r < 3; r++)
1498         for (int c = 0; c < 3; c++) {
1499             m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1500                          + A->vals[r][1] * B->vals[1][c]
1501                          + A->vals[r][2] * B->vals[2][c];
1502         }
1503     return m;
1504 }
1505 
1506 #if defined(__clang__)
1507     [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by classify() on the way out.
1508 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1509 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1510     TF_PQish  pq;
1511     TF_HLGish hlg;
1512     switch (classify(*src, &pq, &hlg)) {
1513         case Bad: return false;
1514         case sRGBish: break;  // handled below
1515 
1516         case PQish:
1517             *dst = { TFKind_marker(PQish), -pq.A,  pq.D, 1.0f/pq.F
1518                                          ,  pq.B, -pq.E, 1.0f/pq.C};
1519             return true;
1520 
1521         case HLGish:
1522             *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1523                                              , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1524             return true;
1525 
1526         case HLGinvish:
1527             *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1528                                           , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1529             return true;
1530     }
1531 
1532     assert (classify(*src) == sRGBish);
1533 
1534     // We're inverting this function, solving for x in terms of y.
1535     //   y = (cx + f)         x < d
1536     //       (ax + b)^g + e   x ≥ d
1537     // The inverse of this function can be expressed in the same piecewise form.
1538     skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1539 
1540     // We'll start by finding the new threshold inv.d.
1541     // In principle we should be able to find that by solving for y at x=d from either side.
1542     // (If those two d values aren't the same, it's a discontinuous transfer function.)
1543     float d_l =       src->c * src->d + src->f,
1544           d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1545     if (fabsf_(d_l - d_r) > 1/512.0f) {
1546         return false;
1547     }
1548     inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1549 
1550     // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1551     if (inv.d > 0) {
1552         // Inverting the linear section is pretty straightfoward:
1553         //        y       = cx + f
1554         //        y - f   = cx
1555         //   (1/c)y - f/c = x
1556         inv.c =    1.0f/src->c;
1557         inv.f = -src->f/src->c;
1558     }
1559 
1560     // The interesting part is inverting the nonlinear section:
1561     //         y                = (ax + b)^g + e.
1562     //         y - e            = (ax + b)^g
1563     //        (y - e)^1/g       =  ax + b
1564     //        (y - e)^1/g - b   =  ax
1565     //   (1/a)(y - e)^1/g - b/a =   x
1566     //
1567     // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1568     //   let k = (1/a)^g
1569     //   (1/a)( y -  e)^1/g - b/a = x
1570     //        (ky - ke)^1/g - b/a = x
1571 
1572     float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1573     inv.g = 1.0f / src->g;
1574     inv.a = k;
1575     inv.b = -k * src->e;
1576     inv.e = -src->b / src->a;
1577 
1578     // We need to enforce the same constraints here that we do when fitting a curve,
1579     // a >= 0 and ad+b >= 0.  These constraints are checked by classify(), so they're true
1580     // of the source function if we're here.
1581 
1582     // Just like when fitting the curve, there's really no way to rescue a < 0.
1583     if (inv.a < 0) {
1584         return false;
1585     }
1586     // On the other hand we can rescue an ad+b that's gone slightly negative here.
1587     if (inv.a * inv.d + inv.b < 0) {
1588         inv.b = -inv.a * inv.d;
1589     }
1590 
1591     // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1592     // where we might still fail here, like non-finite parameter values.
1593     if (classify(inv) != sRGBish) {
1594         return false;
1595     }
1596 
1597     assert (inv.a >= 0);
1598     assert (inv.a * inv.d + inv.b >= 0);
1599 
1600     // Now in principle we're done.
1601     // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1602     // e or f of the inverse, depending on which segment contains src(1.0f).
1603     float s = skcms_TransferFunction_eval(src, 1.0f);
1604     if (!isfinitef_(s)) {
1605         return false;
1606     }
1607 
1608     float sign = s < 0 ? -1.0f : 1.0f;
1609     s *= sign;
1610     if (s < inv.d) {
1611         inv.f = 1.0f - sign * inv.c * s;
1612     } else {
1613         inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1614     }
1615 
1616     *dst = inv;
1617     return classify(*dst) == sRGBish;
1618 }
1619 
1620 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1621 
1622 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1623 //
1624 //   tf(x) =  cx + f          x < d
1625 //   tf(x) = (ax + b)^g + e   x ≥ d
1626 //
1627 // When fitting, we add the additional constraint that both pieces meet at d:
1628 //
1629 //   cd + f = (ad + b)^g + e
1630 //
1631 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1632 //
1633 //   tf(x) =                           cx + f   x < d
1634 //   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
1635 //
1636 // Our overall strategy is then:
1637 //    For a couple tolerances,
1638 //       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
1639 //       - invert c,d,f
1640 //       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1641 //                          (and by constraint, inverted e) to the inverse of the table.
1642 //    Return the parameters with least maximum error.
1643 //
1644 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1645 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1646 //
1647 //    let y = Table(x)
1648 //    r(x) = x - f_inv(y)
1649 //
1650 //    ∂r/∂g = ln(ay + b)*(ay + b)^g
1651 //          - ln(ad + b)*(ad + b)^g
1652 //    ∂r/∂a = yg(ay + b)^(g-1)
1653 //          - dg(ad + b)^(g-1)
1654 //    ∂r/∂b =  g(ay + b)^(g-1)
1655 //          -  g(ad + b)^(g-1)
1656 
1657 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1658 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,const float P[3],float dfdP[3])1659 static float rg_nonlinear(float x,
1660                           const skcms_Curve* curve,
1661                           const skcms_TransferFunction* tf,
1662                           const float P[3],
1663                           float dfdP[3]) {
1664     const float y = eval_curve(curve, x);
1665 
1666     const float g = P[0],  a = P[1],  b = P[2],
1667                 c = tf->c, d = tf->d, f = tf->f;
1668 
1669     const float Y = fmaxf_(a*y + b, 0.0f),
1670                 D =        a*d + b;
1671     assert (D >= 0);
1672 
1673     // The gradient.
1674     dfdP[0] = logf_(Y)*powf_(Y, g)
1675             - logf_(D)*powf_(D, g);
1676     dfdP[1] = y*g*powf_(Y, g-1)
1677             - d*g*powf_(D, g-1);
1678     dfdP[2] =   g*powf_(Y, g-1)
1679             -   g*powf_(D, g-1);
1680 
1681     // The residual.
1682     const float f_inv = powf_(Y, g)
1683                       - powf_(D, g)
1684                       + c*d + f;
1685     return x - f_inv;
1686 }
1687 
gauss_newton_step(const skcms_Curve * curve,const skcms_TransferFunction * tf,float P[3],float x0,float dx,int N)1688 static bool gauss_newton_step(const skcms_Curve* curve,
1689                               const skcms_TransferFunction* tf,
1690                               float P[3],
1691                               float x0, float dx, int N) {
1692     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1693     //
1694     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1695     //   where r(P) is the residual vector
1696     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
1697     //
1698     // Let's review the shape of each of these expressions:
1699     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
1700     //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
1701     //   Jf^T   is [3 x N], the transpose of Jf
1702     //
1703     //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1704     //                                              and so is its inverse (Jf^T Jf)^-1
1705     //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1706     //
1707     // Our implementation strategy to get to the final ∆P is
1708     //   1) evaluate Jf^T Jf,   call that lhs
1709     //   2) evaluate Jf^T r(P), call that rhs
1710     //   3) invert lhs
1711     //   4) multiply inverse lhs by rhs
1712     //
1713     // This is a friendly implementation strategy because we don't have to have any
1714     // buffers that scale with N, and equally nice don't have to perform any matrix
1715     // operations that are variable size.
1716     //
1717     // Other implementation strategies could trade this off, e.g. evaluating the
1718     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1719     // the residuals.  That would probably require implementing singular value
1720     // decomposition, and would create a [3 x N] matrix to be multiplied by the
1721     // [N x 1] residual vector, but on the upside I think that'd eliminate the
1722     // possibility of this gauss_newton_step() function ever failing.
1723 
1724     // 0) start off with lhs and rhs safely zeroed.
1725     skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1726     skcms_Vector3   rhs = {  {0,0,0} };
1727 
1728     // 1,2) evaluate lhs and evaluate rhs
1729     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1730     //   so we'll have to update lhs and rhs at the same time.
1731     for (int i = 0; i < N; i++) {
1732         float x = x0 + i*dx;
1733 
1734         float dfdP[3] = {0,0,0};
1735         float resid = rg_nonlinear(x,curve,tf,P, dfdP);
1736 
1737         for (int r = 0; r < 3; r++) {
1738             for (int c = 0; c < 3; c++) {
1739                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1740             }
1741             rhs.vals[r] += dfdP[r] * resid;
1742         }
1743     }
1744 
1745     // If any of the 3 P parameters are unused, this matrix will be singular.
1746     // Detect those cases and fix them up to indentity instead, so we can invert.
1747     for (int k = 0; k < 3; k++) {
1748         if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1749             lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1750             lhs.vals[k][k] = 1;
1751         }
1752     }
1753 
1754     // 3) invert lhs
1755     skcms_Matrix3x3 lhs_inv;
1756     if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1757         return false;
1758     }
1759 
1760     // 4) multiply inverse lhs by rhs
1761     skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1762     P[0] += dP.vals[0];
1763     P[1] += dP.vals[1];
1764     P[2] += dP.vals[2];
1765     return isfinitef_(P[0]) && isfinitef_(P[1]) && isfinitef_(P[2]);
1766 }
1767 
1768 
1769 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1770 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1771     float P[3] = { tf->g, tf->a, tf->b };
1772 
1773     // No matter where we start, dx should always represent N even steps from 0 to 1.
1774     const float dx = 1.0f / (N-1);
1775 
1776     // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
1777     for (int j = 0; j < 2; j++) {
1778         // These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization.
1779         // We don't really know how to fix up a if it goes negative.
1780         if (P[1] < 0) {
1781             return false;
1782         }
1783         // If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero.
1784         if (P[1] * tf->d + P[2] < 0) {
1785             P[2] = -P[1] * tf->d;
1786         }
1787         assert (P[1] >= 0 &&
1788                 P[1] * tf->d + P[2] >= 0);
1789 
1790         if (!gauss_newton_step(curve, tf,
1791                                P,
1792                                L*dx, dx, N-L)) {
1793             return false;
1794         }
1795     }
1796 
1797     // We need to apply our fixups one last time
1798     if (P[1] < 0) {
1799         return false;
1800     }
1801     if (P[1] * tf->d + P[2] < 0) {
1802         P[2] = -P[1] * tf->d;
1803     }
1804 
1805     assert (P[1] >= 0 &&
1806             P[1] * tf->d + P[2] >= 0);
1807 
1808     tf->g = P[0];
1809     tf->a = P[1];
1810     tf->b = P[2];
1811     tf->e =   tf->c*tf->d + tf->f
1812       - powf_(tf->a*tf->d + tf->b, tf->g);
1813     return true;
1814 }
1815 
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1816 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1817                             skcms_TransferFunction* approx,
1818                             float* max_error) {
1819     if (!curve || !approx || !max_error) {
1820         return false;
1821     }
1822 
1823     if (curve->table_entries == 0) {
1824         // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1825         return false;
1826     }
1827 
1828     if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1829         // We need at least two points, and must put some reasonable cap on the maximum number.
1830         return false;
1831     }
1832 
1833     int N = (int)curve->table_entries;
1834     const float dx = 1.0f / (N - 1);
1835 
1836     *max_error = INFINITY_;
1837     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1838     for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1839         skcms_TransferFunction tf,
1840                                tf_inv;
1841 
1842         // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1843         tf.f = 0.0f;
1844         int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1845 
1846         if (L == N) {
1847             // If the entire data set was linear, move the coefficients to the nonlinear portion
1848             // with G == 1.  This lets use a canonical representation with d == 0.
1849             tf.g = 1;
1850             tf.a = tf.c;
1851             tf.b = tf.f;
1852             tf.c = tf.d = tf.e = tf.f = 0;
1853         } else if (L == N - 1) {
1854             // Degenerate case with only two points in the nonlinear segment. Solve directly.
1855             tf.g = 1;
1856             tf.a = (eval_curve(curve, (N-1)*dx) -
1857                     eval_curve(curve, (N-2)*dx))
1858                  / dx;
1859             tf.b = eval_curve(curve, (N-2)*dx)
1860                  - tf.a * (N-2)*dx;
1861             tf.e = 0;
1862         } else {
1863             // Start by guessing a gamma-only curve through the midpoint.
1864             int mid = (L + N) / 2;
1865             float mid_x = mid / (N - 1.0f);
1866             float mid_y = eval_curve(curve, mid_x);
1867             tf.g = log2f_(mid_y) / log2f_(mid_x);
1868             tf.a = 1;
1869             tf.b = 0;
1870             tf.e =    tf.c*tf.d + tf.f
1871               - powf_(tf.a*tf.d + tf.b, tf.g);
1872 
1873 
1874             if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1875                 !fit_nonlinear(curve, L,N, &tf_inv)) {
1876                 continue;
1877             }
1878 
1879             // We fit tf_inv, so calculate tf to keep in sync.
1880             if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1881                 continue;
1882             }
1883         }
1884 
1885         // We'd better have a sane, sRGB-ish TF by now.
1886         // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
1887         // anything else is just some accident of math and the way we pun tf.g as a type flag.
1888         if (sRGBish != classify(tf)) {
1889             continue;
1890         }
1891 
1892         // We find our error by roundtripping the table through tf_inv.
1893         //
1894         // (The most likely use case for this approximation is to be inverted and
1895         // used as the transfer function for a destination color space.)
1896         //
1897         // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1898         // invertible, so re-verify that here (and use the new inverse for testing).
1899         if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1900             continue;
1901         }
1902 
1903         float err = max_roundtrip_error(curve, &tf_inv);
1904         if (*max_error > err) {
1905             *max_error = err;
1906             *approx    = tf;
1907         }
1908     }
1909     return isfinitef_(*max_error);
1910 }
1911 
1912 // ~~~~ Impl. of skcms_Transform() ~~~~
1913 
1914 typedef enum {
1915     Op_load_a8,
1916     Op_load_g8,
1917     Op_load_8888_palette8,
1918     Op_load_4444,
1919     Op_load_565,
1920     Op_load_888,
1921     Op_load_8888,
1922     Op_load_1010102,
1923     Op_load_161616LE,
1924     Op_load_16161616LE,
1925     Op_load_161616BE,
1926     Op_load_16161616BE,
1927     Op_load_hhh,
1928     Op_load_hhhh,
1929     Op_load_fff,
1930     Op_load_ffff,
1931 
1932     Op_swap_rb,
1933     Op_clamp,
1934     Op_invert,
1935     Op_force_opaque,
1936     Op_premul,
1937     Op_unpremul,
1938     Op_matrix_3x3,
1939     Op_matrix_3x4,
1940     Op_lab_to_xyz,
1941 
1942     Op_tf_r,
1943     Op_tf_g,
1944     Op_tf_b,
1945     Op_tf_a,
1946 
1947     Op_pq_r,
1948     Op_pq_g,
1949     Op_pq_b,
1950     Op_pq_a,
1951 
1952     Op_hlg_r,
1953     Op_hlg_g,
1954     Op_hlg_b,
1955     Op_hlg_a,
1956 
1957     Op_hlginv_r,
1958     Op_hlginv_g,
1959     Op_hlginv_b,
1960     Op_hlginv_a,
1961 
1962     Op_table_r,
1963     Op_table_g,
1964     Op_table_b,
1965     Op_table_a,
1966 
1967     Op_clut,
1968 
1969     Op_store_a8,
1970     Op_store_g8,
1971     Op_store_4444,
1972     Op_store_565,
1973     Op_store_888,
1974     Op_store_8888,
1975     Op_store_1010102,
1976     Op_store_161616LE,
1977     Op_store_16161616LE,
1978     Op_store_161616BE,
1979     Op_store_16161616BE,
1980     Op_store_hhh,
1981     Op_store_hhhh,
1982     Op_store_fff,
1983     Op_store_ffff,
1984 } Op;
1985 
1986 #if defined(__clang__)
1987     template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
1988 #elif defined(__GNUC__)
1989     // For some reason GCC accepts this nonsense, but not the more straightforward version,
1990     //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
1991     template <int N, typename T>
1992     struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
1993 
1994     template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
1995 #endif
1996 
1997 // First, instantiate our default exec_ops() implementation using the default compiliation target.
1998 
1999 namespace baseline {
2000 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2001                             || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2002     #define N 1
2003     template <typename T> using V = T;
2004     using Color = float;
2005 #elif defined(__AVX512F__)
2006     #define N 16
2007     template <typename T> using V = Vec<N,T>;
2008     using Color = float;
2009 #elif defined(__AVX__)
2010     #define N 8
2011     template <typename T> using V = Vec<N,T>;
2012     using Color = float;
2013 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2014     #define N 8
2015     template <typename T> using V = Vec<N,T>;
2016     using Color = _Float16;
2017 #else
2018     #define N 4
2019     template <typename T> using V = Vec<N,T>;
2020     using Color = float;
2021 #endif
2022 
2023     #include "src/Transform_inl.h"
2024     #undef N
2025 }
2026 
2027 // Now, instantiate any other versions of run_program() we may want for runtime detection.
2028 #if !defined(SKCMS_PORTABLE) &&                           \
2029     !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) &&           \
2030         (( defined(__clang__) && __clang_major__ >= 5) || \
2031          (!defined(__clang__) && defined(__GNUC__)))      \
2032      && defined(__x86_64__)
2033 
2034     #if !defined(__AVX2__)
2035         #if defined(__clang__)
2036             #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2037         #elif defined(__GNUC__)
2038             #pragma GCC push_options
2039             #pragma GCC target("avx2,f16c")
2040         #endif
2041 
2042         namespace hsw {
2043             #define USING_AVX
2044             #define USING_AVX_F16C
2045             #define USING_AVX2
2046             #define N 8
2047             template <typename T> using V = Vec<N,T>;
2048             using Color = float;
2049 
2050             #include "src/Transform_inl.h"
2051 
2052             // src/Transform_inl.h will undefine USING_* for us.
2053             #undef N
2054         }
2055 
2056         #if defined(__clang__)
2057             #pragma clang attribute pop
2058         #elif defined(__GNUC__)
2059             #pragma GCC pop_options
2060         #endif
2061 
2062         #define TEST_FOR_HSW
2063     #endif
2064 
2065     #if !defined(__AVX512F__)
2066         #if defined(__clang__)
2067             #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2068         #elif defined(__GNUC__)
2069             #pragma GCC push_options
2070             #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2071         #endif
2072 
2073         namespace skx {
2074             #define USING_AVX512F
2075             #define N 16
2076             template <typename T> using V = Vec<N,T>;
2077             using Color = float;
2078 
2079             #include "src/Transform_inl.h"
2080 
2081             // src/Transform_inl.h will undefine USING_* for us.
2082             #undef N
2083         }
2084 
2085         #if defined(__clang__)
2086             #pragma clang attribute pop
2087         #elif defined(__GNUC__)
2088             #pragma GCC pop_options
2089         #endif
2090 
2091         #define TEST_FOR_SKX
2092     #endif
2093 
2094     #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2095         enum class CpuType { None, HSW, SKX };
cpu_type()2096         static CpuType cpu_type() {
2097             static const CpuType type = []{
2098                 // See http://www.sandpile.org/x86/cpuid.htm
2099 
2100                 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2101                 uint32_t eax, ebx, ecx, edx;
2102                 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2103                                              : "0"(1), "2"(0));
2104                 if ((edx & (1u<<25)) &&  // SSE
2105                     (edx & (1u<<26)) &&  // SSE2
2106                     (ecx & (1u<< 0)) &&  // SSE3
2107                     (ecx & (1u<< 9)) &&  // SSSE3
2108                     (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
2109                     (ecx & (1u<<19)) &&  // SSE4.1
2110                     (ecx & (1u<<20)) &&  // SSE4.2
2111                     (ecx & (1u<<26)) &&  // XSAVE
2112                     (ecx & (1u<<27)) &&  // OSXSAVE
2113                     (ecx & (1u<<28)) &&  // AVX
2114                     (ecx & (1u<<29))) {  // F16C
2115 
2116                     // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2117                     __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2118                                                  : "0"(7), "2"(0));
2119                     // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2120                     uint32_t xcr0, dont_need_edx;
2121                     __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2122 
2123                     if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
2124                         (xcr0 & (1u<<2)) &&  // YMM register state saved?
2125                         (ebx  & (1u<<5))) {  // AVX2
2126                         // At this point we're at least HSW.  Continue checking for SKX.
2127                         if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2128                             (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2129                             (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2130                             (ebx  & (1u<<16)) && // AVX512F
2131                             (ebx  & (1u<<17)) && // AVX512DQ
2132                             (ebx  & (1u<<28)) && // AVX512CD
2133                             (ebx  & (1u<<30)) && // AVX512BW
2134                             (ebx  & (1u<<31))) { // AVX512VL
2135                             return CpuType::SKX;
2136                         }
2137                         return CpuType::HSW;
2138                     }
2139                 }
2140                 return CpuType::None;
2141             }();
2142             return type;
2143         }
2144     #endif
2145 
2146 #endif
2147 
2148 typedef struct {
2149     Op          op;
2150     const void* arg;
2151 } OpAndArg;
2152 
select_curve_op(const skcms_Curve * curve,int channel)2153 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2154     static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2155         { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2156         { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2157         { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2158         { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2159     };
2160     const auto& op = ops[channel];
2161 
2162     if (curve->table_entries == 0) {
2163         const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2164 
2165         const skcms_TransferFunction& tf = curve->parametric;
2166 
2167         if (tf.g == 1 && tf.a == 1 &&
2168             tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2169             return noop;
2170         }
2171 
2172         switch (classify(tf)) {
2173             case Bad:        return noop;
2174             case sRGBish:    return OpAndArg{op.sRGBish,   &tf};
2175             case PQish:      return OpAndArg{op.PQish,     &tf};
2176             case HLGish:     return OpAndArg{op.HLGish,    &tf};
2177             case HLGinvish:  return OpAndArg{op.HLGinvish, &tf};
2178         }
2179     }
2180     return OpAndArg{op.table, curve};
2181 }
2182 
bytes_per_pixel(skcms_PixelFormat fmt)2183 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2184     switch (fmt >> 1) {   // ignore rgb/bgr
2185         case skcms_PixelFormat_A_8                >> 1: return  1;
2186         case skcms_PixelFormat_G_8                >> 1: return  1;
2187         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return  1;
2188         case skcms_PixelFormat_ABGR_4444          >> 1: return  2;
2189         case skcms_PixelFormat_RGB_565            >> 1: return  2;
2190         case skcms_PixelFormat_RGB_888            >> 1: return  3;
2191         case skcms_PixelFormat_RGBA_8888          >> 1: return  4;
2192         case skcms_PixelFormat_RGBA_1010102       >> 1: return  4;
2193         case skcms_PixelFormat_RGB_161616LE       >> 1: return  6;
2194         case skcms_PixelFormat_RGBA_16161616LE    >> 1: return  8;
2195         case skcms_PixelFormat_RGB_161616BE       >> 1: return  6;
2196         case skcms_PixelFormat_RGBA_16161616BE    >> 1: return  8;
2197         case skcms_PixelFormat_RGB_hhh_Norm       >> 1: return  6;
2198         case skcms_PixelFormat_RGBA_hhhh_Norm     >> 1: return  8;
2199         case skcms_PixelFormat_RGB_hhh            >> 1: return  6;
2200         case skcms_PixelFormat_RGBA_hhhh          >> 1: return  8;
2201         case skcms_PixelFormat_RGB_fff            >> 1: return 12;
2202         case skcms_PixelFormat_RGBA_ffff          >> 1: return 16;
2203     }
2204     assert(false);
2205     return 0;
2206 }
2207 
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2208 static bool prep_for_destination(const skcms_ICCProfile* profile,
2209                                  skcms_Matrix3x3* fromXYZD50,
2210                                  skcms_TransferFunction* invR,
2211                                  skcms_TransferFunction* invG,
2212                                  skcms_TransferFunction* invB) {
2213     // We only support destinations with parametric transfer functions
2214     // and with gamuts that can be transformed from XYZD50.
2215     return profile->has_trc
2216         && profile->has_toXYZD50
2217         && profile->trc[0].table_entries == 0
2218         && profile->trc[1].table_entries == 0
2219         && profile->trc[2].table_entries == 0
2220         && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2221         && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2222         && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2223         && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2224 }
2225 
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2226 bool skcms_Transform(const void*             src,
2227                      skcms_PixelFormat       srcFmt,
2228                      skcms_AlphaFormat       srcAlpha,
2229                      const skcms_ICCProfile* srcProfile,
2230                      void*                   dst,
2231                      skcms_PixelFormat       dstFmt,
2232                      skcms_AlphaFormat       dstAlpha,
2233                      const skcms_ICCProfile* dstProfile,
2234                      size_t                  npixels) {
2235     return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2236                                       dst, dstFmt, dstAlpha, dstProfile,
2237                                       npixels, nullptr);
2238 }
2239 
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2240 bool skcms_TransformWithPalette(const void*             src,
2241                                 skcms_PixelFormat       srcFmt,
2242                                 skcms_AlphaFormat       srcAlpha,
2243                                 const skcms_ICCProfile* srcProfile,
2244                                 void*                   dst,
2245                                 skcms_PixelFormat       dstFmt,
2246                                 skcms_AlphaFormat       dstAlpha,
2247                                 const skcms_ICCProfile* dstProfile,
2248                                 size_t                  nz,
2249                                 const void*             palette) {
2250     const size_t dst_bpp = bytes_per_pixel(dstFmt),
2251                  src_bpp = bytes_per_pixel(srcFmt);
2252     // Let's just refuse if the request is absurdly big.
2253     if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2254         return false;
2255     }
2256     int n = (int)nz;
2257 
2258     // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2259     if (!srcProfile) {
2260         srcProfile = skcms_sRGB_profile();
2261     }
2262     if (!dstProfile) {
2263         dstProfile = skcms_sRGB_profile();
2264     }
2265 
2266     // We can't transform in place unless the PixelFormats are the same size.
2267     if (dst == src && dst_bpp != src_bpp) {
2268         return false;
2269     }
2270     // TODO: more careful alias rejection (like, dst == src + 1)?
2271 
2272     if (needs_palette(srcFmt) && !palette) {
2273         return false;
2274     }
2275 
2276     Op          program  [32];
2277     const void* arguments[32];
2278 
2279     Op*          ops  = program;
2280     const void** args = arguments;
2281 
2282     // These are always parametric curves of some sort.
2283     skcms_Curve dst_curves[3];
2284     dst_curves[0].table_entries =
2285     dst_curves[1].table_entries =
2286     dst_curves[2].table_entries = 0;
2287 
2288     skcms_Matrix3x3        from_xyz;
2289 
2290     switch (srcFmt >> 1) {
2291         default: return false;
2292         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_load_a8;         break;
2293         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_load_g8;         break;
2294         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_load_4444;       break;
2295         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_load_565;        break;
2296         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_load_888;        break;
2297         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_load_8888;       break;
2298         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_load_1010102;    break;
2299         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_load_161616LE;   break;
2300         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2301         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_load_161616BE;   break;
2302         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2303         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_load_hhh;        break;
2304         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_load_hhhh;       break;
2305         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_load_hhh;        break;
2306         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_load_hhhh;       break;
2307         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_load_fff;        break;
2308         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_load_ffff;       break;
2309 
2310         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++  = Op_load_8888_palette8;
2311                                                         *args++ = palette;
2312                                                         break;
2313     }
2314     if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2315         srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2316         *ops++ = Op_clamp;
2317     }
2318     if (srcFmt & 1) {
2319         *ops++ = Op_swap_rb;
2320     }
2321     skcms_ICCProfile gray_dst_profile;
2322     if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2323         // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2324         // luminance (Y) by the destination transfer function.
2325         gray_dst_profile = *dstProfile;
2326         skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2327         dstProfile = &gray_dst_profile;
2328     }
2329 
2330     if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2331         // Photoshop creates CMYK images as inverse CMYK.
2332         // These happen to be the only ones we've _ever_ seen.
2333         *ops++ = Op_invert;
2334         // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2335         srcAlpha = skcms_AlphaFormat_Unpremul;
2336     }
2337 
2338     if (srcAlpha == skcms_AlphaFormat_Opaque) {
2339         *ops++ = Op_force_opaque;
2340     } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2341         *ops++ = Op_unpremul;
2342     }
2343 
2344     if (dstProfile != srcProfile) {
2345 
2346         if (!prep_for_destination(dstProfile,
2347                                   &from_xyz,
2348                                   &dst_curves[0].parametric,
2349                                   &dst_curves[1].parametric,
2350                                   &dst_curves[2].parametric)) {
2351             return false;
2352         }
2353 
2354         if (srcProfile->has_A2B) {
2355             if (srcProfile->A2B.input_channels) {
2356                 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2357                     OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2358                     if (oa.arg) {
2359                         *ops++  = oa.op;
2360                         *args++ = oa.arg;
2361                     }
2362                 }
2363                 *ops++ = Op_clamp;
2364                 *ops++ = Op_clut;
2365                 *args++ = &srcProfile->A2B;
2366             }
2367 
2368             if (srcProfile->A2B.matrix_channels == 3) {
2369                 for (int i = 0; i < 3; i++) {
2370                     OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2371                     if (oa.arg) {
2372                         *ops++  = oa.op;
2373                         *args++ = oa.arg;
2374                     }
2375                 }
2376 
2377                 static const skcms_Matrix3x4 I = {{
2378                     {1,0,0,0},
2379                     {0,1,0,0},
2380                     {0,0,1,0},
2381                 }};
2382                 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2383                     *ops++  = Op_matrix_3x4;
2384                     *args++ = &srcProfile->A2B.matrix;
2385                 }
2386             }
2387 
2388             if (srcProfile->A2B.output_channels == 3) {
2389                 for (int i = 0; i < 3; i++) {
2390                     OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2391                     if (oa.arg) {
2392                         *ops++  = oa.op;
2393                         *args++ = oa.arg;
2394                     }
2395                 }
2396             }
2397 
2398             if (srcProfile->pcs == skcms_Signature_Lab) {
2399                 *ops++ = Op_lab_to_xyz;
2400             }
2401 
2402         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2403             for (int i = 0; i < 3; i++) {
2404                 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2405                 if (oa.arg) {
2406                     *ops++  = oa.op;
2407                     *args++ = oa.arg;
2408                 }
2409             }
2410         } else {
2411             return false;
2412         }
2413 
2414         // A2B sources should already be in XYZD50 at this point.
2415         // Others still need to be transformed using their toXYZD50 matrix.
2416         // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2417         // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2418         assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2419         static const skcms_Matrix3x3 I = {{
2420             { 1.0f, 0.0f, 0.0f },
2421             { 0.0f, 1.0f, 0.0f },
2422             { 0.0f, 0.0f, 1.0f },
2423         }};
2424         const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2425 
2426         // There's a chance the source and destination gamuts are identical,
2427         // in which case we can skip the gamut transform.
2428         if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2429             // Concat the entire gamut transform into from_xyz,
2430             // now slightly misnamed but it's a handy spot to stash the result.
2431             from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2432             *ops++  = Op_matrix_3x3;
2433             *args++ = &from_xyz;
2434         }
2435 
2436         // Encode back to dst RGB using its parametric transfer functions.
2437         for (int i = 0; i < 3; i++) {
2438             OpAndArg oa = select_curve_op(dst_curves+i, i);
2439             if (oa.arg) {
2440                 assert (oa.op != Op_table_r &&
2441                         oa.op != Op_table_g &&
2442                         oa.op != Op_table_b &&
2443                         oa.op != Op_table_a);
2444                 *ops++  = oa.op;
2445                 *args++ = oa.arg;
2446             }
2447         }
2448     }
2449 
2450     // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2451     // not just to values that fit in [0,1].
2452     //
2453     // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2454     // but would be carrying r > 1, which is really unexpected for downstream consumers.
2455     if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2456         *ops++ = Op_clamp;
2457     }
2458     if (dstAlpha == skcms_AlphaFormat_Opaque) {
2459         *ops++ = Op_force_opaque;
2460     } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2461         *ops++ = Op_premul;
2462     }
2463     if (dstFmt & 1) {
2464         *ops++ = Op_swap_rb;
2465     }
2466     switch (dstFmt >> 1) {
2467         default: return false;
2468         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_store_a8;         break;
2469         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_store_g8;         break;
2470         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_store_4444;       break;
2471         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_store_565;        break;
2472         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_store_888;        break;
2473         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_store_8888;       break;
2474         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_store_1010102;    break;
2475         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_store_161616LE;   break;
2476         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2477         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_store_161616BE;   break;
2478         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2479         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_store_hhh;        break;
2480         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_store_hhhh;       break;
2481         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_store_hhh;        break;
2482         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_store_hhhh;       break;
2483         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_store_fff;        break;
2484         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_store_ffff;       break;
2485     }
2486 
2487     auto run = baseline::run_program;
2488 #if defined(TEST_FOR_HSW)
2489     switch (cpu_type()) {
2490         case CpuType::None:                        break;
2491         case CpuType::HSW: run = hsw::run_program; break;
2492         case CpuType::SKX: run = hsw::run_program; break;
2493     }
2494 #endif
2495 #if defined(TEST_FOR_SKX)
2496     switch (cpu_type()) {
2497         case CpuType::None:                        break;
2498         case CpuType::HSW:                         break;
2499         case CpuType::SKX: run = skx::run_program; break;
2500     }
2501 #endif
2502     run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2503     return true;
2504 }
2505 
assert_usable_as_destination(const skcms_ICCProfile * profile)2506 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2507 #if defined(NDEBUG)
2508     (void)profile;
2509 #else
2510     skcms_Matrix3x3 fromXYZD50;
2511     skcms_TransferFunction invR, invG, invB;
2512     assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2513 #endif
2514 }
2515 
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2516 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2517     skcms_Matrix3x3 fromXYZD50;
2518     if (!profile->has_trc || !profile->has_toXYZD50
2519         || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2520         return false;
2521     }
2522 
2523     skcms_TransferFunction tf[3];
2524     for (int i = 0; i < 3; i++) {
2525         skcms_TransferFunction inv;
2526         if (profile->trc[i].table_entries == 0
2527             && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2528             tf[i] = profile->trc[i].parametric;
2529             continue;
2530         }
2531 
2532         float max_error;
2533         // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2534         if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2535             return false;
2536         }
2537     }
2538 
2539     for (int i = 0; i < 3; ++i) {
2540         profile->trc[i].table_entries = 0;
2541         profile->trc[i].parametric = tf[i];
2542     }
2543 
2544     assert_usable_as_destination(profile);
2545     return true;
2546 }
2547 
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2548 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2549     // Operate on a copy of profile, so we can choose the best TF for the original curves
2550     skcms_ICCProfile result = *profile;
2551     if (!skcms_MakeUsableAsDestination(&result)) {
2552         return false;
2553     }
2554 
2555     int best_tf = 0;
2556     float min_max_error = INFINITY_;
2557     for (int i = 0; i < 3; i++) {
2558         skcms_TransferFunction inv;
2559         if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2560             return false;
2561         }
2562 
2563         float err = 0;
2564         for (int j = 0; j < 3; ++j) {
2565             err = fmaxf_(err, max_roundtrip_error(&profile->trc[j], &inv));
2566         }
2567         if (min_max_error > err) {
2568             min_max_error = err;
2569             best_tf = i;
2570         }
2571     }
2572 
2573     for (int i = 0; i < 3; i++) {
2574         result.trc[i].parametric = result.trc[best_tf].parametric;
2575     }
2576 
2577     *profile = result;
2578     assert_usable_as_destination(profile);
2579     return true;
2580 }
2581