1 //---------------------------------------------------------------------------------
2 //
3 //  Little Color Management System, fast floating point extensions
4 //  Copyright (c) 1998-2020 Marti Maria Saguer, all rights reserved
5 //
6 //
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 //
20 //---------------------------------------------------------------------------------
21 
22 // Optimization for matrix-shaper in float
23 
24 #include "fast_float_internal.h"
25 
26 
27 // This is the private data container used by this optimization
28 typedef struct {
29 
30 
31     cmsFloat32Number Mat[3][3];
32     cmsFloat32Number Off[3];
33 
34     cmsFloat32Number Shaper1R[MAX_NODES_IN_CURVE];
35     cmsFloat32Number Shaper1G[MAX_NODES_IN_CURVE];
36     cmsFloat32Number Shaper1B[MAX_NODES_IN_CURVE];
37 
38     cmsFloat32Number Shaper2R[MAX_NODES_IN_CURVE];
39     cmsFloat32Number Shaper2G[MAX_NODES_IN_CURVE];
40     cmsFloat32Number Shaper2B[MAX_NODES_IN_CURVE];
41 
42     cmsBool UseOff;
43 
44     void * real_ptr;
45 
46 } VXMatShaperFloatData;
47 
48 
49 static
malloc_aligned(cmsContext ContextID)50 VXMatShaperFloatData* malloc_aligned(cmsContext ContextID)
51 {
52     cmsUInt8Number* real_ptr = (cmsUInt8Number*) _cmsMallocZero(ContextID, sizeof(VXMatShaperFloatData) + 32);
53     cmsUInt8Number* aligned = (cmsUInt8Number*) (((uintptr_t)real_ptr + 16) & ~0xf);
54     VXMatShaperFloatData* p = (VXMatShaperFloatData*) aligned;
55 
56     p ->real_ptr = real_ptr;
57     return p;
58 }
59 
60 
61 
62 // Free the private data container
63 static
FreeMatShaper(cmsContext ContextID,void * Data)64 void  FreeMatShaper(cmsContext ContextID, void* Data)
65 {
66        VXMatShaperFloatData* d = (VXMatShaperFloatData*)Data;
67 
68        if (d != NULL)
69               _cmsFree(ContextID, d->real_ptr);
70 }
71 
72 
73 static
FillShaper(cmsFloat32Number * Table,cmsToneCurve * Curve)74 void FillShaper(cmsFloat32Number* Table, cmsToneCurve* Curve)
75 {
76     int i;
77     cmsFloat32Number R;
78 
79     for (i = 0; i < MAX_NODES_IN_CURVE; i++) {
80 
81            R = (cmsFloat32Number) i / (cmsFloat32Number) (MAX_NODES_IN_CURVE - 1);
82 
83         Table[i] = cmsEvalToneCurveFloat(Curve, R);
84     }
85 }
86 
87 
88 // Compute the matrix-shaper structure
89 static
SetMatShaper(cmsContext ContextID,cmsToneCurve * Curve1[3],cmsMAT3 * Mat,cmsVEC3 * Off,cmsToneCurve * Curve2[3])90 VXMatShaperFloatData* SetMatShaper(cmsContext ContextID, cmsToneCurve* Curve1[3], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[3])
91 {
92     VXMatShaperFloatData* p;
93     int i, j;
94 
95     // Allocate a big chuck of memory to store precomputed tables
96     p = malloc_aligned(ContextID);
97     if (p == NULL) return FALSE;
98 
99 
100     // Precompute tables
101     FillShaper(p->Shaper1R, Curve1[0]);
102     FillShaper(p->Shaper1G, Curve1[1]);
103     FillShaper(p->Shaper1B, Curve1[2]);
104 
105     FillShaper(p->Shaper2R, Curve2[0]);
106     FillShaper(p->Shaper2G, Curve2[1]);
107     FillShaper(p->Shaper2B, Curve2[2]);
108 
109 
110     for (i=0; i < 3; i++) {
111         for (j=0; j < 3; j++) {
112                p->Mat[i][j] = (cmsFloat32Number) Mat->v[i].n[j];
113         }
114     }
115 
116 
117     for (i = 0; i < 3; i++) {
118 
119            if (Off == NULL) {
120 
121                   p->UseOff = FALSE;
122                   p->Off[i] = 0.0;
123            }
124            else {
125                   p->UseOff = TRUE;
126                   p->Off[i] = (cmsFloat32Number)Off->n[i];
127 
128            }
129     }
130 
131 
132     return p;
133 }
134 
135 
136 
137 // A fast matrix-shaper evaluator for floating point
138 static
MatShaperFloat(struct _cmstransform_struct * CMMcargo,const void * Input,void * Output,cmsUInt32Number PixelsPerLine,cmsUInt32Number LineCount,const cmsStride * Stride)139 void MatShaperFloat(struct _cmstransform_struct* CMMcargo,
140                         const void* Input,
141                         void* Output,
142                         cmsUInt32Number PixelsPerLine,
143                         cmsUInt32Number LineCount,
144                         const cmsStride* Stride)
145 {
146     VXMatShaperFloatData* p = (VXMatShaperFloatData*) _cmsGetTransformUserData(CMMcargo);
147     cmsFloat32Number l1, l2, l3;
148     cmsFloat32Number r, g, b;
149     cmsUInt32Number i, ii;
150     cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS];
151     cmsUInt32Number SourceIncrements[cmsMAXCHANNELS];
152     cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS];
153     cmsUInt32Number DestIncrements[cmsMAXCHANNELS];
154 
155     const cmsUInt8Number* rin;
156     const cmsUInt8Number* gin;
157     const cmsUInt8Number* bin;
158     const cmsUInt8Number* ain = NULL;
159 
160     cmsUInt8Number* rout;
161     cmsUInt8Number* gout;
162     cmsUInt8Number* bout;
163     cmsUInt8Number* aout = NULL;
164 
165     cmsUInt32Number nchans, nalpha;
166     cmsUInt32Number strideIn, strideOut;
167 
168     _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements);
169     _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements);
170 
171     if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA))
172         nalpha = 0;
173 
174     strideIn = strideOut = 0;
175     for (i = 0; i < LineCount; i++) {
176 
177         rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn;
178         gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn;
179         bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn;
180 
181         if (nalpha)
182             ain = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn;
183 
184         rout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut;
185         gout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut;
186         bout = (cmsUInt8Number*)Output + DestStartingOrder[2] + strideOut;
187 
188         if (nalpha)
189             aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut;
190 
191         for (ii = 0; ii < PixelsPerLine; ii++) {
192 
193             r = flerp(p->Shaper1R, *(cmsFloat32Number*)rin);
194             g = flerp(p->Shaper1G, *(cmsFloat32Number*)gin);
195             b = flerp(p->Shaper1B, *(cmsFloat32Number*)bin);
196 
197             l1 = p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b;
198             l2 = p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b;
199             l3 = p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b;
200 
201             if (p->UseOff) {
202 
203                 l1 += p->Off[0];
204                 l2 += p->Off[1];
205                 l3 += p->Off[2];
206             }
207 
208             *(cmsFloat32Number*)rout = flerp(p->Shaper2R, l1);
209             *(cmsFloat32Number*)gout = flerp(p->Shaper2G, l2);
210             *(cmsFloat32Number*)bout = flerp(p->Shaper2B, l3);
211 
212             rin += SourceIncrements[0];
213             gin += SourceIncrements[1];
214             bin += SourceIncrements[2];
215 
216             rout += DestIncrements[0];
217             gout += DestIncrements[1];
218             bout += DestIncrements[2];
219 
220             if (ain)
221             {
222                 *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain;
223                 ain += SourceIncrements[3];
224                 aout += DestIncrements[3];
225             }
226         }
227 
228         strideIn += Stride->BytesPerLineIn;
229         strideOut += Stride->BytesPerLineOut;
230     }
231 }
232 
233 
234 
OptimizeFloatMatrixShaper(_cmsTransform2Fn * TransformFn,void ** UserData,_cmsFreeUserDataFn * FreeUserData,cmsPipeline ** Lut,cmsUInt32Number * InputFormat,cmsUInt32Number * OutputFormat,cmsUInt32Number * dwFlags)235 cmsBool OptimizeFloatMatrixShaper(_cmsTransform2Fn* TransformFn,
236                                   void** UserData,
237                                   _cmsFreeUserDataFn* FreeUserData,
238                                   cmsPipeline** Lut,
239                                   cmsUInt32Number* InputFormat,
240                                   cmsUInt32Number* OutputFormat,
241                                   cmsUInt32Number* dwFlags)
242 {
243     cmsStage* Curve1, *Curve2;
244     cmsStage* Matrix1, *Matrix2;
245     _cmsStageMatrixData* Data1;
246     _cmsStageMatrixData* Data2;
247     cmsMAT3 res;
248     cmsBool IdentityMat = FALSE;
249     cmsPipeline* Dest, *Src;
250     cmsContext ContextID;
251     cmsUInt32Number nChans;
252     cmsFloat64Number factor = 1.0;
253 
254 
255     // Apply only to floating-point cases
256     if (!T_FLOAT(*InputFormat) || !T_FLOAT(*OutputFormat)) return FALSE;
257 
258     // Only works on RGB to RGB and gray to gray
259     if ( !( (T_CHANNELS(*InputFormat) == 3 && T_CHANNELS(*OutputFormat) == 3))  &&
260          !( (T_CHANNELS(*InputFormat) == 1 && T_CHANNELS(*OutputFormat) == 1))) return FALSE;
261 
262     // Only works on float
263     if (T_BYTES(*InputFormat) != 4 || T_BYTES(*OutputFormat) != 4) return FALSE;
264 
265     // Seems suitable, proceed
266     Src = *Lut;
267 
268     // Check for shaper-matrix-matrix-shaper structure, that is what this optimizer stands for
269     if (!cmsPipelineCheckAndRetreiveStages(Src, 4,
270         cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
271         &Curve1, &Matrix1, &Matrix2, &Curve2)) return FALSE;
272 
273     ContextID = cmsGetPipelineContextID(Src);
274     nChans    = T_CHANNELS(*InputFormat);
275 
276     // Get both matrices, which are 3x3
277     Data1 = (_cmsStageMatrixData*) cmsStageData(Matrix1);
278     Data2 = (_cmsStageMatrixData*) cmsStageData(Matrix2);
279 
280     // Input offset should be zero
281     if (Data1 ->Offset != NULL) return FALSE;
282 
283     if (cmsStageInputChannels(Matrix1) == 1 && cmsStageOutputChannels(Matrix2) == 1)
284     {
285         // This is a gray to gray. Just multiply
286          factor = Data1->Double[0]*Data2->Double[0] +
287                   Data1->Double[1]*Data2->Double[1] +
288                   Data1->Double[2]*Data2->Double[2];
289 
290         if (fabs(1 - factor) < (1.0 / 65535.0)) IdentityMat = TRUE;
291     }
292     else
293     {
294         // Multiply both matrices to get the result
295         _cmsMAT3per(&res, (cmsMAT3*) Data2 ->Double, (cmsMAT3*) Data1 ->Double);
296 
297         // Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
298         IdentityMat = FALSE;
299         if (_cmsMAT3isIdentity(&res) && Data2 ->Offset == NULL) {
300 
301             // We can get rid of full matrix
302             IdentityMat = TRUE;
303         }
304     }
305 
306       // Allocate an empty LUT
307     Dest =  cmsPipelineAlloc(ContextID, nChans, nChans);
308     if (!Dest) return FALSE;
309 
310     // Assamble the new LUT
311     cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageDup(Curve1));
312 
313     if (!IdentityMat) {
314 
315         if (nChans == 1)
316              cmsPipelineInsertStage(Dest, cmsAT_END,
317                     cmsStageAllocMatrix(ContextID, 1, 1, (const cmsFloat64Number*) &factor, Data2->Offset));
318         else
319             cmsPipelineInsertStage(Dest, cmsAT_END,
320                     cmsStageAllocMatrix(ContextID, 3, 3, (const cmsFloat64Number*) &res, Data2 ->Offset));
321     }
322 
323 
324     cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2));
325 
326     // If identity on matrix, we can further optimize the curves, so call the join curves routine
327     if (IdentityMat) {
328 
329            OptimizeFloatByJoiningCurves(TransformFn, UserData, FreeUserData, &Dest, InputFormat, OutputFormat, dwFlags);
330     }
331     else {
332         _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
333         _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
334 
335         // In this particular optimization, cach� does not help as it takes more time to deal with
336         // the cachthat with the pixel handling
337         *dwFlags |= cmsFLAGS_NOCACHE;
338 
339         // Setup the optimizarion routines
340         *UserData = SetMatShaper(ContextID, mpeC1 ->TheCurves, &res, (cmsVEC3*) Data2 ->Offset, mpeC2->TheCurves);
341         *FreeUserData = FreeMatShaper;
342 
343         *TransformFn = MatShaperFloat;
344     }
345 
346     *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER;
347     cmsPipelineFree(Src);
348     *Lut = Dest;
349     return TRUE;
350 }
351 
352 
353