1 /*
2 Copyright (c) 2003-2010 Sony Pictures Imageworks Inc., et al.
3 All Rights Reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 * Redistributions of source code must retain the above copyright
9   notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11   notice, this list of conditions and the following disclaimer in the
12   documentation and/or other materials provided with the distribution.
13 * Neither the name of Sony Pictures Imageworks nor the names of its
14   contributors may be used to endorse or promote products derived from
15   this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 
29 #include <OpenColorIO/OpenColorIO.h>
30 
31 #include "HashUtils.h"
32 #include "Lut3DOp.h"
33 #include "MathUtils.h"
34 
35 #include <cmath>
36 #include <limits>
37 #include <sstream>
38 #include <algorithm>
39 
40 OCIO_NAMESPACE_ENTER
41 {
42     Lut3D::Lut3D()
43     {
44         for(int i=0; i<3; ++i)
45         {
46             from_min[i] = 0.0f;
47             from_max[i] = 1.0f;
48             size[i] = 0;
49         }
50     }
51 
52     Lut3DRcPtr Lut3D::Create()
53     {
54         return Lut3DRcPtr(new Lut3D());
55     }
56 
57     std::string Lut3D::getCacheID() const
58     {
59         AutoMutex lock(m_cacheidMutex);
60 
61         if(lut.empty())
62             throw Exception("Cannot compute cacheID of invalid Lut3D");
63 
64         if(!m_cacheID.empty())
65             return m_cacheID;
66 
67         md5_state_t state;
68         md5_byte_t digest[16];
69 
70         md5_init(&state);
71 
72         md5_append(&state, (const md5_byte_t *)from_min, (int)(3*sizeof(float)));
73         md5_append(&state, (const md5_byte_t *)from_max, (int)(3*sizeof(float)));
74         md5_append(&state, (const md5_byte_t *)size,     (int)(3*sizeof(int)));
75         md5_append(&state, (const md5_byte_t *)&lut[0],  (int)(lut.size()*sizeof(float)));
76 
77         md5_finish(&state, digest);
78 
79         m_cacheID = GetPrintableHash(digest);
80 
81         return m_cacheID;
82     }
83 
84 
85     namespace
86     {
87         // Linear
88         inline float lerp(float a, float b, float z)
89             { return (b - a) * z + a; }
90 
91         inline void lerp_rgb(float* out, float* a, float* b, float* z)
92         {
93             out[0] = (b[0] - a[0]) * z[0] + a[0];
94             out[1] = (b[1] - a[1]) * z[1] + a[1];
95             out[2] = (b[2] - a[2]) * z[2] + a[2];
96         }
97 
98         // Bilinear
99         inline float lerp(float a, float b, float c, float d, float y, float z)
100             { return lerp(lerp(a, b, z), lerp(c, d, z), y); }
101 
102         inline void lerp_rgb(float* out, float* a, float* b, float* c,
103                              float* d, float* y, float* z)
104         {
105             float v1[3];
106             float v2[3];
107 
108             lerp_rgb(v1, a, b, z);
109             lerp_rgb(v2, c, d, z);
110             lerp_rgb(out, v1, v2, y);
111         }
112 
113         // Trilinear
114         inline float lerp(float a, float b, float c, float d,
115                           float e, float f, float g, float h,
116                           float x, float y, float z)
117             { return lerp(lerp(a,b,c,d,y,z), lerp(e,f,g,h,y,z), x); }
118 
119         inline void lerp_rgb(float* out, float* a, float* b, float* c, float* d,
120                              float* e, float* f, float* g, float* h,
121                              float* x, float* y, float* z)
122         {
123             float v1[3];
124             float v2[3];
125 
126             lerp_rgb(v1, a,b,c,d,y,z);
127             lerp_rgb(v2, e,f,g,h,y,z);
128             lerp_rgb(out, v1, v2, x);
129         }
130 
131         inline float lookupNearest_3D(int rIndex, int gIndex, int bIndex,
132                                       int size_red, int size_green, int size_blue,
133                                       const float* simple_rgb_lut, int channelIndex)
134         {
135             return simple_rgb_lut[ GetLut3DIndex_B(rIndex, gIndex, bIndex,
136                 size_red, size_green, size_blue) + channelIndex];
137         }
138 
139         inline void lookupNearest_3D_rgb(float* rgb,
140                                          int rIndex, int gIndex, int bIndex,
141                                          int size_red, int size_green, int size_blue,
142                                          const float* simple_rgb_lut)
143         {
144             int offset = GetLut3DIndex_B(rIndex, gIndex, bIndex, size_red, size_green, size_blue);
145             rgb[0] = simple_rgb_lut[offset];
146             rgb[1] = simple_rgb_lut[offset+1];
147             rgb[2] = simple_rgb_lut[offset+2];
148         }
149 
150         // Note: This function assumes that minVal is less than maxVal
151         inline int clamp(float k, float minVal, float maxVal)
152         {
153             return static_cast<int>(roundf(std::max(std::min(k, maxVal), minVal)));
154         }
155 
156         ///////////////////////////////////////////////////////////////////////
157         // Nearest Forward
158 
159         void Lut3D_Nearest(float* rgbaBuffer, long numPixels, const Lut3D & lut)
160         {
161             float maxIndex[3];
162             float mInv[3];
163             float b[3];
164             float mInv_x_maxIndex[3];
165             int lutSize[3];
166             const float* startPos = &(lut.lut[0]);
167 
168             for(int i=0; i<3; ++i)
169             {
170                 maxIndex[i] = (float) (lut.size[i] - 1);
171                 mInv[i] = 1.0f / (lut.from_max[i] - lut.from_min[i]);
172                 b[i] = lut.from_min[i];
173                 mInv_x_maxIndex[i] = (float) (mInv[i] * maxIndex[i]);
174 
175                 lutSize[i] = lut.size[i];
176             }
177 
178             int localIndex[3];
179 
180             for(long pixelIndex=0; pixelIndex<numPixels; ++pixelIndex)
181             {
182                 if(isnan(rgbaBuffer[0]) || isnan(rgbaBuffer[1]) || isnan(rgbaBuffer[2]))
183                 {
184                     rgbaBuffer[0] = std::numeric_limits<float>::quiet_NaN();
185                     rgbaBuffer[1] = std::numeric_limits<float>::quiet_NaN();
186                     rgbaBuffer[2] = std::numeric_limits<float>::quiet_NaN();
187                 }
188                 else
189                 {
190                     localIndex[0] = clamp(mInv_x_maxIndex[0] * (rgbaBuffer[0] - b[0]), 0.0f, maxIndex[0]);
191                     localIndex[1] = clamp(mInv_x_maxIndex[1] * (rgbaBuffer[1] - b[1]), 0.0f, maxIndex[1]);
192                     localIndex[2] = clamp(mInv_x_maxIndex[2] * (rgbaBuffer[2] - b[2]), 0.0f, maxIndex[2]);
193 
194                     lookupNearest_3D_rgb(rgbaBuffer, localIndex[0], localIndex[1], localIndex[2],
195                                                      lutSize[0], lutSize[1], lutSize[2], startPos);
196                 }
197 
198                 rgbaBuffer += 4;
199             }
200         }
201 
202         ///////////////////////////////////////////////////////////////////////
203         // Linear Forward
204 
205         void Lut3D_Linear(float* rgbaBuffer, long numPixels, const Lut3D & lut)
206         {
207             float maxIndex[3];
208             float mInv[3];
209             float b[3];
210             float mInv_x_maxIndex[3];
211             int lutSize[3];
212             const float* startPos = &(lut.lut[0]);
213 
214             for(int i=0; i<3; ++i)
215             {
216                 maxIndex[i] = (float) (lut.size[i] - 1);
217                 mInv[i] = 1.0f / (lut.from_max[i] - lut.from_min[i]);
218                 b[i] = lut.from_min[i];
219                 mInv_x_maxIndex[i] = (float) (mInv[i] * maxIndex[i]);
220 
221                 lutSize[i] = lut.size[i];
222             }
223 
224             for(long pixelIndex=0; pixelIndex<numPixels; ++pixelIndex)
225             {
226 
227                 if(isnan(rgbaBuffer[0]) || isnan(rgbaBuffer[1]) || isnan(rgbaBuffer[2]))
228                 {
229                     rgbaBuffer[0] = std::numeric_limits<float>::quiet_NaN();
230                     rgbaBuffer[1] = std::numeric_limits<float>::quiet_NaN();
231                     rgbaBuffer[2] = std::numeric_limits<float>::quiet_NaN();
232                 }
233                 else
234                 {
235                     float localIndex[3];
236                     int indexLow[3];
237                     int indexHigh[3];
238                     float delta[3];
239                     float a[3];
240                     float b_[3];
241                     float c[3];
242                     float d[3];
243                     float e[3];
244                     float f[3];
245                     float g[3];
246                     float h[4];
247                     float x[4];
248                     float y[4];
249                     float z[4];
250 
251                     localIndex[0] = std::max(std::min(mInv_x_maxIndex[0] * (rgbaBuffer[0] - b[0]), maxIndex[0]), 0.0f);
252                     localIndex[1] = std::max(std::min(mInv_x_maxIndex[1] * (rgbaBuffer[1] - b[1]), maxIndex[1]), 0.0f);
253                     localIndex[2] = std::max(std::min(mInv_x_maxIndex[2] * (rgbaBuffer[2] - b[2]), maxIndex[2]), 0.0f);
254 
255                     indexLow[0] =  static_cast<int>(std::floor(localIndex[0]));
256                     indexLow[1] =  static_cast<int>(std::floor(localIndex[1]));
257                     indexLow[2] =  static_cast<int>(std::floor(localIndex[2]));
258 
259                     indexHigh[0] =  static_cast<int>(std::ceil(localIndex[0]));
260                     indexHigh[1] =  static_cast<int>(std::ceil(localIndex[1]));
261                     indexHigh[2] =  static_cast<int>(std::ceil(localIndex[2]));
262 
263                     delta[0] = localIndex[0] - static_cast<float>(indexLow[0]);
264                     delta[1] = localIndex[1] - static_cast<float>(indexLow[1]);
265                     delta[2] = localIndex[2] - static_cast<float>(indexLow[2]);
266 
267                     // Lookup 8 corners of cube
268                     lookupNearest_3D_rgb(a, indexLow[0],  indexLow[1],  indexLow[2],  lutSize[0], lutSize[1], lutSize[2], startPos);
269                     lookupNearest_3D_rgb(b_, indexLow[0],  indexLow[1],  indexHigh[2], lutSize[0], lutSize[1], lutSize[2], startPos);
270                     lookupNearest_3D_rgb(c, indexLow[0],  indexHigh[1], indexLow[2],  lutSize[0], lutSize[1], lutSize[2], startPos);
271                     lookupNearest_3D_rgb(d, indexLow[0],  indexHigh[1], indexHigh[2], lutSize[0], lutSize[1], lutSize[2], startPos);
272                     lookupNearest_3D_rgb(e, indexHigh[0], indexLow[1],  indexLow[2],  lutSize[0], lutSize[1], lutSize[2], startPos);
273                     lookupNearest_3D_rgb(f, indexHigh[0], indexLow[1],  indexHigh[2], lutSize[0], lutSize[1], lutSize[2], startPos);
274                     lookupNearest_3D_rgb(g, indexHigh[0], indexHigh[1], indexLow[2],  lutSize[0], lutSize[1], lutSize[2], startPos);
275                     lookupNearest_3D_rgb(h, indexHigh[0], indexHigh[1], indexHigh[2], lutSize[0], lutSize[1], lutSize[2], startPos);
276 
277                     // Also store the 3d interpolation coordinates
278                     x[0] = delta[0]; x[1] = delta[0]; x[2] = delta[0];
279                     y[0] = delta[1]; y[1] = delta[1]; y[2] = delta[1];
280                     z[0] = delta[2]; z[1] = delta[2]; z[2] = delta[2];
281 
282                     // Do a trilinear interpolation of the 8 corners
283                     // 4726.8 scanlines/sec
284 
285                     lerp_rgb(rgbaBuffer, a, b_, c, d, e, f, g, h,
286                                          x, y, z);
287                 }
288 
289                 rgbaBuffer += 4;
290             }
291         }
292     }
293 
294 
295     void Lut3D_Tetrahedral(float* rgbaBuffer, long numPixels, const Lut3D & lut)
296     {
297         // Tetrahedral interoplation, as described by:
298         // http://www.filmlight.ltd.uk/pdf/whitepapers/FL-TL-TN-0057-SoftwareLib.pdf
299         // http://blogs.mathworks.com/steve/2006/11/24/tetrahedral-interpolation-for-colorspace-conversion/
300         // http://www.hpl.hp.com/techreports/98/HPL-98-95.html
301 
302         float maxIndex[3];
303         float mInv[3];
304         float b[3];
305         float mInv_x_maxIndex[3];
306         int lutSize[3];
307         const float* startPos = &(lut.lut[0]);
308 
309         for(int i=0; i<3; ++i)
310         {
311             maxIndex[i] = (float) (lut.size[i] - 1);
312             mInv[i] = 1.0f / (lut.from_max[i] - lut.from_min[i]);
313             b[i] = lut.from_min[i];
314             mInv_x_maxIndex[i] = (float) (mInv[i] * maxIndex[i]);
315 
316             lutSize[i] = lut.size[i];
317         }
318 
319         for(long pixelIndex=0; pixelIndex<numPixels; ++pixelIndex)
320         {
321 
322             if(isnan(rgbaBuffer[0]) || isnan(rgbaBuffer[1]) || isnan(rgbaBuffer[2]))
323             {
324                 rgbaBuffer[0] = std::numeric_limits<float>::quiet_NaN();
325                 rgbaBuffer[1] = std::numeric_limits<float>::quiet_NaN();
326                 rgbaBuffer[2] = std::numeric_limits<float>::quiet_NaN();
327             }
328             else
329             {
330                 float localIndex[3];
331                 int indexLow[3];
332                 int indexHigh[3];
333                 float delta[3];
334 
335                 // Same index/delta calculation as linear interpolation
336                 localIndex[0] = std::max(std::min(mInv_x_maxIndex[0] * (rgbaBuffer[0] - b[0]), maxIndex[0]), 0.0f);
337                 localIndex[1] = std::max(std::min(mInv_x_maxIndex[1] * (rgbaBuffer[1] - b[1]), maxIndex[1]), 0.0f);
338                 localIndex[2] = std::max(std::min(mInv_x_maxIndex[2] * (rgbaBuffer[2] - b[2]), maxIndex[2]), 0.0f);
339 
340                 indexLow[0] =  static_cast<int>(std::floor(localIndex[0]));
341                 indexLow[1] =  static_cast<int>(std::floor(localIndex[1]));
342                 indexLow[2] =  static_cast<int>(std::floor(localIndex[2]));
343 
344                 indexHigh[0] =  static_cast<int>(std::ceil(localIndex[0]));
345                 indexHigh[1] =  static_cast<int>(std::ceil(localIndex[1]));
346                 indexHigh[2] =  static_cast<int>(std::ceil(localIndex[2]));
347 
348                 delta[0] = localIndex[0] - static_cast<float>(indexLow[0]);
349                 delta[1] = localIndex[1] - static_cast<float>(indexLow[1]);
350                 delta[2] = localIndex[2] - static_cast<float>(indexLow[2]);
351 
352                 // Rebind for consistency with Truelight paper
353                 float fx = delta[0];
354                 float fy = delta[1];
355                 float fz = delta[2];
356 
357                 // Compute index into LUT for surrounding corners
358                 const int n000 = GetLut3DIndex_B(indexLow[0], indexLow[1], indexLow[2],
359                                                  lutSize[0], lutSize[1], lutSize[2]);
360                 const int n100 = GetLut3DIndex_B(indexHigh[0], indexLow[1], indexLow[2],
361                                                  lutSize[0], lutSize[1], lutSize[2]);
362                 const int n010 = GetLut3DIndex_B(indexLow[0], indexHigh[1], indexLow[2],
363                                                  lutSize[0], lutSize[1], lutSize[2]);
364                 const int n001 = GetLut3DIndex_B(indexLow[0], indexLow[1], indexHigh[2],
365                                                  lutSize[0], lutSize[1], lutSize[2]);
366                 const int n110 = GetLut3DIndex_B(indexHigh[0], indexHigh[1], indexLow[2],
367                                                  lutSize[0], lutSize[1], lutSize[2]);
368                 const int n101 = GetLut3DIndex_B(indexHigh[0], indexLow[1], indexHigh[2],
369                                                  lutSize[0], lutSize[1], lutSize[2]);
370                 const int n011 = GetLut3DIndex_B(indexLow[0], indexHigh[1], indexHigh[2],
371                                                  lutSize[0], lutSize[1], lutSize[2]);
372                 const int n111 = GetLut3DIndex_B(indexHigh[0], indexHigh[1], indexHigh[2],
373                                                  lutSize[0], lutSize[1], lutSize[2]);
374 
375                 if (fx > fy) {
376                     if (fy > fz) {
377                        rgbaBuffer[0] =
378                            (1-fx)  * startPos[n000] +
379                            (fx-fy) * startPos[n100] +
380                            (fy-fz) * startPos[n110] +
381                            (fz)    * startPos[n111];
382 
383                        rgbaBuffer[1] =
384                            (1-fx)  * startPos[n000+1] +
385                            (fx-fy) * startPos[n100+1] +
386                            (fy-fz) * startPos[n110+1] +
387                            (fz)    * startPos[n111+1];
388 
389                        rgbaBuffer[2] =
390                            (1-fx)  * startPos[n000+2] +
391                            (fx-fy) * startPos[n100+2] +
392                            (fy-fz) * startPos[n110+2] +
393                            (fz)    * startPos[n111+2];
394                     }
395                     else if (fx > fz)
396                     {
397                         rgbaBuffer[0] =
398                             (1-fx)  * startPos[n000] +
399                             (fx-fz) * startPos[n100] +
400                             (fz-fy) * startPos[n101] +
401                             (fy)    * startPos[n111];
402 
403                         rgbaBuffer[1] =
404                             (1-fx)  * startPos[n000+1] +
405                             (fx-fz) * startPos[n100+1] +
406                             (fz-fy) * startPos[n101+1] +
407                             (fy)    * startPos[n111+1];
408 
409                         rgbaBuffer[2] =
410                             (1-fx)  * startPos[n000+2] +
411                             (fx-fz) * startPos[n100+2] +
412                             (fz-fy) * startPos[n101+2] +
413                             (fy)    * startPos[n111+2];
414                     }
415                     else
416                     {
417                         rgbaBuffer[0] =
418                             (1-fz)  * startPos[n000] +
419                             (fz-fx) * startPos[n001] +
420                             (fx-fy) * startPos[n101] +
421                             (fy)    * startPos[n111];
422 
423                         rgbaBuffer[1] =
424                             (1-fz)  * startPos[n000+1] +
425                             (fz-fx) * startPos[n001+1] +
426                             (fx-fy) * startPos[n101+1] +
427                             (fy)    * startPos[n111+1];
428 
429                         rgbaBuffer[2] =
430                             (1-fz)  * startPos[n000+2] +
431                             (fz-fx) * startPos[n001+2] +
432                             (fx-fy) * startPos[n101+2] +
433                             (fy)    * startPos[n111+2];
434                     }
435                 }
436                 else
437                 {
438                     if (fz > fy)
439                     {
440                         rgbaBuffer[0] =
441                             (1-fz)  * startPos[n000] +
442                             (fz-fy) * startPos[n001] +
443                             (fy-fx) * startPos[n011] +
444                             (fx)    * startPos[n111];
445 
446                         rgbaBuffer[1] =
447                             (1-fz)  * startPos[n000+1] +
448                             (fz-fy) * startPos[n001+1] +
449                             (fy-fx) * startPos[n011+1] +
450                             (fx)    * startPos[n111+1];
451 
452                         rgbaBuffer[2] =
453                             (1-fz)  * startPos[n000+2] +
454                             (fz-fy) * startPos[n001+2] +
455                             (fy-fx) * startPos[n011+2] +
456                             (fx)    * startPos[n111+2];
457                     }
458                     else if (fz > fx)
459                     {
460                         rgbaBuffer[0] =
461                             (1-fy)  * startPos[n000] +
462                             (fy-fz) * startPos[n010] +
463                             (fz-fx) * startPos[n011] +
464                             (fx)    * startPos[n111];
465 
466                         rgbaBuffer[1] =
467                             (1-fy)  * startPos[n000+1] +
468                             (fy-fz) * startPos[n010+1] +
469                             (fz-fx) * startPos[n011+1] +
470                             (fx)    * startPos[n111+1];
471 
472                         rgbaBuffer[2] =
473                             (1-fy)  * startPos[n000+2] +
474                             (fy-fz) * startPos[n010+2] +
475                             (fz-fx) * startPos[n011+2] +
476                             (fx)    * startPos[n111+2];
477                     }
478                     else
479                     {
480                         rgbaBuffer[0] =
481                             (1-fy)  * startPos[n000] +
482                             (fy-fx) * startPos[n010] +
483                             (fx-fz) * startPos[n110] +
484                             (fz)    * startPos[n111];
485 
486                         rgbaBuffer[1] =
487                             (1-fy)  * startPos[n000+1] +
488                             (fy-fx) * startPos[n010+1] +
489                             (fx-fz) * startPos[n110+1] +
490                             (fz)    * startPos[n111+1];
491 
492                         rgbaBuffer[2] =
493                             (1-fy)  * startPos[n000+2] +
494                             (fy-fx) * startPos[n010+2] +
495                             (fx-fz) * startPos[n110+2] +
496                             (fz)    * startPos[n111+2];
497                     }
498                 }
499             } // !isnan
500 
501             rgbaBuffer += 4;
502         }
503     }
504 
505 
506     void GenerateIdentityLut3D(float* img, int edgeLen, int numChannels, Lut3DOrder lut3DOrder)
507     {
508         if(!img) return;
509         if(numChannels < 3)
510         {
511             throw Exception("Cannot generate idenitity 3d lut with less than 3 channels.");
512         }
513 
514         float c = 1.0f / ((float)edgeLen - 1.0f);
515 
516         if(lut3DOrder == LUT3DORDER_FAST_RED)
517         {
518             for(int i=0; i<edgeLen*edgeLen*edgeLen; i++)
519             {
520                 img[numChannels*i+0] = (float)(i%edgeLen) * c;
521                 img[numChannels*i+1] = (float)((i/edgeLen)%edgeLen) * c;
522                 img[numChannels*i+2] = (float)((i/edgeLen/edgeLen)%edgeLen) * c;
523             }
524         }
525         else if(lut3DOrder == LUT3DORDER_FAST_BLUE)
526         {
527             for(int i=0; i<edgeLen*edgeLen*edgeLen; i++)
528             {
529                 img[numChannels*i+0] = (float)((i/edgeLen/edgeLen)%edgeLen) * c;
530                 img[numChannels*i+1] = (float)((i/edgeLen)%edgeLen) * c;
531                 img[numChannels*i+2] = (float)(i%edgeLen) * c;
532             }
533         }
534         else
535         {
536             throw Exception("Unknown Lut3DOrder.");
537         }
538     }
539 
540 
541     int Get3DLutEdgeLenFromNumPixels(int numPixels)
542     {
543         int dim = static_cast<int>(roundf(powf((float) numPixels, 1.0f/3.0f)));
544 
545         if(dim*dim*dim != numPixels)
546         {
547             std::ostringstream os;
548             os << "Cannot infer 3D Lut size. ";
549             os << numPixels << " element(s) does not correspond to a ";
550             os << "unform cube edge length. (nearest edge length is ";
551             os << dim << ").";
552             throw Exception(os.str().c_str());
553         }
554 
555         return dim;
556     }
557 
558     namespace
559     {
560         class Lut3DOp : public Op
561         {
562         public:
563             Lut3DOp(Lut3DRcPtr lut,
564                     Interpolation interpolation,
565                     TransformDirection direction);
566             virtual ~Lut3DOp();
567 
568             virtual OpRcPtr clone() const;
569 
570             virtual std::string getInfo() const;
571             virtual std::string getCacheID() const;
572 
573             virtual bool isNoOp() const;
574             virtual bool isSameType(const OpRcPtr & op) const;
575             virtual bool isInverse(const OpRcPtr & op) const;
576             virtual bool hasChannelCrosstalk() const;
577             virtual void finalize();
578             virtual void apply(float* rgbaBuffer, long numPixels) const;
579 
580             virtual bool supportsGpuShader() const;
581             virtual void writeGpuShader(std::ostream & shader,
582                                         const std::string & pixelName,
583                                         const GpuShaderDesc & shaderDesc) const;
584 
585         private:
586             Lut3DRcPtr m_lut;
587             Interpolation m_interpolation;
588             TransformDirection m_direction;
589 
590             // Set in finalize
591             std::string m_cacheID;
592         };
593 
594         typedef OCIO_SHARED_PTR<Lut3DOp> Lut3DOpRcPtr;
595 
596 
597         Lut3DOp::Lut3DOp(Lut3DRcPtr lut,
598                          Interpolation interpolation,
599                          TransformDirection direction):
600                             Op(),
601                             m_lut(lut),
602                             m_interpolation(interpolation),
603                             m_direction(direction)
604         {
605         }
606 
607         OpRcPtr Lut3DOp::clone() const
608         {
609             OpRcPtr op = OpRcPtr(new Lut3DOp(m_lut, m_interpolation, m_direction));
610             return op;
611         }
612 
613         Lut3DOp::~Lut3DOp()
614         { }
615 
616         std::string Lut3DOp::getInfo() const
617         {
618             return "<Lut3DOp>";
619         }
620 
621         std::string Lut3DOp::getCacheID() const
622         {
623             return m_cacheID;
624         }
625 
626         // TODO: compute real value for isNoOp
627         bool Lut3DOp::isNoOp() const
628         {
629             return false;
630         }
631 
632         bool Lut3DOp::isSameType(const OpRcPtr & op) const
633         {
634             Lut3DOpRcPtr typedRcPtr = DynamicPtrCast<Lut3DOp>(op);
635             if(!typedRcPtr) return false;
636             return true;
637         }
638 
639         bool Lut3DOp::isInverse(const OpRcPtr & op) const
640         {
641             Lut3DOpRcPtr typedRcPtr = DynamicPtrCast<Lut3DOp>(op);
642             if(!typedRcPtr) return false;
643 
644             if(GetInverseTransformDirection(m_direction) != typedRcPtr->m_direction)
645                 return false;
646 
647             return (m_lut->getCacheID() == typedRcPtr->m_lut->getCacheID());
648         }
649 
650         // TODO: compute real value for hasChannelCrosstalk
651         bool Lut3DOp::hasChannelCrosstalk() const
652         {
653             return true;
654         }
655 
656         void Lut3DOp::finalize()
657         {
658             if(m_direction != TRANSFORM_DIR_FORWARD)
659             {
660                 std::ostringstream os;
661                 os << "3D Luts can only be applied in the forward direction. ";
662                 os << "(" << TransformDirectionToString(m_direction) << ")";
663                 os << " specified.";
664                 throw Exception(os.str().c_str());
665             }
666 
667             // Validate the requested interpolation type
668             switch(m_interpolation)
669             {
670                  // These are the allowed values.
671                 case INTERP_NEAREST:
672                 case INTERP_LINEAR:
673                 case INTERP_TETRAHEDRAL:
674                     break;
675                 case INTERP_BEST:
676                     m_interpolation = INTERP_LINEAR;
677                     break;
678                 case INTERP_UNKNOWN:
679                     throw Exception("Cannot apply Lut3DOp, unspecified interpolation.");
680                     break;
681                 default:
682                     throw Exception("Cannot apply Lut3DOp, invalid interpolation specified.");
683             }
684 
685             for(int i=0; i<3; ++i)
686             {
687                 if(m_lut->size[i] == 0)
688                 {
689                     throw Exception("Cannot apply Lut3DOp, lut object is empty.");
690                 }
691                 // TODO if from_min[i] == from_max[i]
692             }
693 
694             if(m_lut->size[0]*m_lut->size[1]*m_lut->size[2] * 3 != (int)m_lut->lut.size())
695             {
696                 throw Exception("Cannot apply Lut3DOp, specified size does not match data.");
697             }
698 
699             // Create the cacheID
700             std::ostringstream cacheIDStream;
701             cacheIDStream << "<Lut3DOp ";
702             cacheIDStream << m_lut->getCacheID() << " ";
703             cacheIDStream << InterpolationToString(m_interpolation) << " ";
704             cacheIDStream << TransformDirectionToString(m_direction) << " ";
705             cacheIDStream << ">";
706             m_cacheID = cacheIDStream.str();
707         }
708 
709         void Lut3DOp::apply(float* rgbaBuffer, long numPixels) const
710         {
711             if(m_interpolation == INTERP_NEAREST)
712             {
713                 Lut3D_Nearest(rgbaBuffer, numPixels, *m_lut);
714             }
715             else if(m_interpolation == INTERP_LINEAR)
716             {
717                 Lut3D_Linear(rgbaBuffer, numPixels, *m_lut);
718             }
719             else if(m_interpolation == INTERP_TETRAHEDRAL)
720             {
721                 Lut3D_Tetrahedral(rgbaBuffer, numPixels, *m_lut);
722             }
723         }
724 
725         bool Lut3DOp::supportsGpuShader() const
726         {
727             return false;
728         }
729 
730         void Lut3DOp::writeGpuShader(std::ostream & /*shader*/,
731                                      const std::string & /*pixelName*/,
732                                      const GpuShaderDesc & /*shaderDesc*/) const
733         {
734             throw Exception("Lut3DOp does not support analytical shader generation.");
735         }
736     }
737 
738     void CreateLut3DOp(OpRcPtrVec & ops,
739                        Lut3DRcPtr lut,
740                        Interpolation interpolation,
741                        TransformDirection direction)
742     {
743         ops.push_back( Lut3DOpRcPtr(new Lut3DOp(lut, interpolation, direction)) );
744     }
745 }
746 OCIO_NAMESPACE_EXIT
747 
748 ///////////////////////////////////////////////////////////////////////////////
749 
750 #ifdef OCIO_UNIT_TEST
751 
752 #include <cstring>
753 #include <cstdlib>
754 #ifndef WIN32
755 #include <sys/time.h>
756 #endif
757 
758 namespace OCIO = OCIO_NAMESPACE;
759 #include "UnitTest.h"
760 
OIIO_ADD_TEST(Lut3DOp,NanInfValueCheck)761 OIIO_ADD_TEST(Lut3DOp, NanInfValueCheck)
762 {
763     OCIO::Lut3DRcPtr lut = OCIO::Lut3D::Create();
764 
765     lut->from_min[0] = 0.0f;
766     lut->from_min[1] = 0.0f;
767     lut->from_min[2] = 0.0f;
768 
769     lut->from_max[0] = 1.0f;
770     lut->from_max[1] = 1.0f;
771     lut->from_max[2] = 1.0f;
772 
773     lut->size[0] = 3;
774     lut->size[1] = 3;
775     lut->size[2] = 3;
776 
777     lut->lut.resize(lut->size[0]*lut->size[1]*lut->size[2]*3);
778 
779     GenerateIdentityLut3D(&lut->lut[0], lut->size[0], 3, OCIO::LUT3DORDER_FAST_RED);
780     for(unsigned int i=0; i<lut->lut.size(); ++i)
781     {
782         lut->lut[i] = powf(lut->lut[i], 2.0f);
783     }
784 
785     const float reference[4] = {  std::numeric_limits<float>::signaling_NaN(),
786                                   std::numeric_limits<float>::quiet_NaN(),
787                                   std::numeric_limits<float>::infinity(),
788                                   -std::numeric_limits<float>::infinity() };
789     float color[4];
790 
791     memcpy(color, reference, 4*sizeof(float));
792     OCIO::Lut3D_Nearest(color, 1, *lut);
793 
794     memcpy(color, reference, 4*sizeof(float));
795     OCIO::Lut3D_Linear(color, 1, *lut);
796 }
797 
798 
OIIO_ADD_TEST(Lut3DOp,ValueCheck)799 OIIO_ADD_TEST(Lut3DOp, ValueCheck)
800 {
801     OCIO::Lut3DRcPtr lut = OCIO::Lut3D::Create();
802 
803     lut->from_min[0] = 0.0f;
804     lut->from_min[1] = 0.0f;
805     lut->from_min[2] = 0.0f;
806 
807     lut->from_max[0] = 1.0f;
808     lut->from_max[1] = 1.0f;
809     lut->from_max[2] = 1.0f;
810 
811     lut->size[0] = 32;
812     lut->size[1] = 32;
813     lut->size[2] = 32;
814 
815     lut->lut.resize(lut->size[0]*lut->size[1]*lut->size[2]*3);
816     GenerateIdentityLut3D(&lut->lut[0], lut->size[0], 3, OCIO::LUT3DORDER_FAST_RED);
817     for(unsigned int i=0; i<lut->lut.size(); ++i)
818     {
819         lut->lut[i] = powf(lut->lut[i], 2.0f);
820     }
821 
822     const float reference[] = {  0.0f, 0.2f, 0.3f, 1.0f,
823                                  0.1234f, 0.4567f, 0.9876f, 1.0f,
824                                  11.0f, -0.5f, 0.5010f, 1.0f
825                                 };
826     const float nearest[] = { 0.0f, 0.03746097535f, 0.0842871964f, 1.0f,
827                               0.01664932258f, 0.2039542049f, 1.0f, 1.0f,
828                               1.0f, 0.0f, 0.2663891613f, 1.0f
829                             };
830     const float linear[] = { 0.0f, 0.04016649351f, 0.09021852165f, 1.0f,
831                               0.01537752338f, 0.2087130845f, 0.9756000042f, 1.0f,
832                               1.0f, 0.0f, 0.2512601018f, 1.0f
833                             };
834     float color[12];
835 
836     // Check nearest
837     memcpy(color, reference, 12*sizeof(float));
838     OCIO::Lut3D_Nearest(color, 3, *lut);
839     for(unsigned int i=0; i<12; ++i)
840     {
841         OIIO_CHECK_CLOSE(color[i], nearest[i], 1e-8);
842     }
843 
844     // Check linear
845     memcpy(color, reference, 12*sizeof(float));
846     OCIO::Lut3D_Linear(color, 3, *lut);
847     for(unsigned int i=0; i<12; ++i)
848     {
849         OIIO_CHECK_CLOSE(color[i], linear[i], 1e-8);
850     }
851 
852     // Check tetrahedral
853     memcpy(color, reference, 12*sizeof(float));
854     OCIO::Lut3D_Tetrahedral(color, 3, *lut);
855     for(unsigned int i=0; i<12; ++i)
856     {
857         OIIO_CHECK_CLOSE(color[i], linear[i], 1e-7); // Note, max delta lowered from 1e-8
858     }
859 }
860 
861 
862 
OIIO_ADD_TEST(Lut3DOp,InverseComparisonCheck)863 OIIO_ADD_TEST(Lut3DOp, InverseComparisonCheck)
864 {
865     OCIO::Lut3DRcPtr lut_a = OCIO::Lut3D::Create();
866     lut_a->from_min[0] = 0.0f;
867     lut_a->from_min[1] = 0.0f;
868     lut_a->from_min[2] = 0.0f;
869     lut_a->from_max[0] = 1.0f;
870     lut_a->from_max[1] = 1.0f;
871     lut_a->from_max[2] = 1.0f;
872     lut_a->size[0] = 32;
873     lut_a->size[1] = 32;
874     lut_a->size[2] = 32;
875     lut_a->lut.resize(lut_a->size[0]*lut_a->size[1]*lut_a->size[2]*3);
876     GenerateIdentityLut3D(&lut_a->lut[0], lut_a->size[0], 3, OCIO::LUT3DORDER_FAST_RED);
877 
878     OCIO::Lut3DRcPtr lut_b = OCIO::Lut3D::Create();
879     lut_b->from_min[0] = 0.5f;
880     lut_b->from_min[1] = 0.5f;
881     lut_b->from_min[2] = 0.5f;
882     lut_b->from_max[0] = 1.0f;
883     lut_b->from_max[1] = 1.0f;
884     lut_b->from_max[2] = 1.0f;
885     lut_b->size[0] = 32;
886     lut_b->size[1] = 32;
887     lut_b->size[2] = 32;
888     lut_b->lut.resize(lut_b->size[0]*lut_b->size[1]*lut_b->size[2]*3);
889     GenerateIdentityLut3D(&lut_b->lut[0], lut_b->size[0], 3, OCIO::LUT3DORDER_FAST_RED);
890 
891     OCIO::OpRcPtrVec ops;
892     CreateLut3DOp(ops, lut_a, OCIO::INTERP_NEAREST, OCIO::TRANSFORM_DIR_FORWARD);
893     CreateLut3DOp(ops, lut_a, OCIO::INTERP_LINEAR, OCIO::TRANSFORM_DIR_INVERSE);
894     CreateLut3DOp(ops, lut_b, OCIO::INTERP_LINEAR, OCIO::TRANSFORM_DIR_FORWARD);
895     CreateLut3DOp(ops, lut_b, OCIO::INTERP_LINEAR, OCIO::TRANSFORM_DIR_INVERSE);
896 
897     OIIO_CHECK_EQUAL(ops.size(), 4);
898 
899     OIIO_CHECK_ASSERT(ops[0]->isSameType(ops[1]));
900     OIIO_CHECK_ASSERT(ops[0]->isSameType(ops[2]));
901     OIIO_CHECK_ASSERT(ops[0]->isSameType(ops[3]->clone()));
902 
903     OIIO_CHECK_EQUAL( ops[0]->isInverse(ops[1]), true);
904     OIIO_CHECK_EQUAL( ops[0]->isInverse(ops[2]), false);
905     OIIO_CHECK_EQUAL( ops[0]->isInverse(ops[2]), false);
906     OIIO_CHECK_EQUAL( ops[0]->isInverse(ops[3]), false);
907     OIIO_CHECK_EQUAL( ops[2]->isInverse(ops[3]), true);
908 }
909 
910 
OIIO_ADD_TEST(Lut3DOp,PerformanceCheck)911 OIIO_ADD_TEST(Lut3DOp, PerformanceCheck)
912 {
913     /*
914     OCIO::Lut3D lut;
915 
916     lut.from_min[0] = 0.0f;
917     lut.from_min[1] = 0.0f;
918     lut.from_min[2] = 0.0f;
919 
920     lut.from_max[0] = 1.0f;
921     lut.from_max[1] = 1.0f;
922     lut.from_max[2] = 1.0f;
923 
924     lut.size[0] = 32;
925     lut.size[1] = 32;
926     lut.size[2] = 32;
927 
928     lut.lut.resize(lut.size[0]*lut.size[1]*lut.size[2]*3);
929     GenerateIdentityLut3D(&lut.lut[0], lut.size[0], 3, OCIO::LUT3DORDER_FAST_RED);
930 
931     std::vector<float> img;
932     int xres = 2048;
933     int yres = 1;
934     int channels = 4;
935     img.resize(xres*yres*channels);
936 
937     srand48(0);
938 
939     // create random values from -0.05 to 1.05
940     // (To simulate clipping performance)
941 
942     for(unsigned int i=0; i<img.size(); ++i)
943     {
944         float uniform = (float)drand48();
945         img[i] = uniform*1.1f - 0.05f;
946     }
947 
948     timeval t;
949     gettimeofday(&t, 0);
950     double starttime = (double) t.tv_sec + (double) t.tv_usec / 1000000.0;
951 
952     int numloops = 1024;
953     for(int i=0; i<numloops; ++i)
954     {
955         //OCIO::Lut3D_Nearest(&img[0], xres*yres, lut);
956         OCIO::Lut3D_Linear(&img[0], xres*yres, lut);
957     }
958 
959     gettimeofday(&t, 0);
960     double endtime = (double) t.tv_sec + (double) t.tv_usec / 1000000.0;
961     double totaltime_a = (endtime-starttime)/numloops;
962 
963     printf("Linear: %0.1f ms  - %0.1f fps\n", totaltime_a*1000.0, 1.0/totaltime_a);
964 
965 
966     // Tetrahedral
967     gettimeofday(&t, 0);
968     starttime = (double) t.tv_sec + (double) t.tv_usec / 1000000.0;
969 
970     for(int i=0; i<numloops; ++i)
971     {
972         OCIO::Lut3D_Tetrahedral(&img[0], xres*yres, lut);
973     }
974 
975     gettimeofday(&t, 0);
976     endtime = (double) t.tv_sec + (double) t.tv_usec / 1000000.0;
977     double totaltime_b = (endtime-starttime)/numloops;
978 
979     printf("Tetra: %0.1f ms  - %0.1f fps\n", totaltime_b*1000.0, 1.0/totaltime_b);
980 
981     double speed_diff = totaltime_a/totaltime_b;
982     printf("Tetra is %.04f speed of Linear\n", speed_diff);
983     */
984 }
985 #endif // OCIO_UNIT_TEST
986