1 //---------------------------------------------------------------------------------
2 //
3 // Little Color Management System, fast floating point extensions
4 // Copyright (c) 1998-2020 Marti Maria Saguer, all rights reserved
5 //
6 //
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
19 //
20 //---------------------------------------------------------------------------------
21
22 // Optimization for matrix-shaper in float
23
24 #include "fast_float_internal.h"
25
26
27 // This is the private data container used by this optimization
28 typedef struct {
29
30
31 cmsFloat32Number Mat[3][3];
32 cmsFloat32Number Off[3];
33
34 cmsFloat32Number Shaper1R[MAX_NODES_IN_CURVE];
35 cmsFloat32Number Shaper1G[MAX_NODES_IN_CURVE];
36 cmsFloat32Number Shaper1B[MAX_NODES_IN_CURVE];
37
38 cmsFloat32Number Shaper2R[MAX_NODES_IN_CURVE];
39 cmsFloat32Number Shaper2G[MAX_NODES_IN_CURVE];
40 cmsFloat32Number Shaper2B[MAX_NODES_IN_CURVE];
41
42 cmsBool UseOff;
43
44 void * real_ptr;
45
46 } VXMatShaperFloatData;
47
48
49 static
malloc_aligned(cmsContext ContextID)50 VXMatShaperFloatData* malloc_aligned(cmsContext ContextID)
51 {
52 cmsUInt8Number* real_ptr = (cmsUInt8Number*) _cmsMallocZero(ContextID, sizeof(VXMatShaperFloatData) + 32);
53 cmsUInt8Number* aligned = (cmsUInt8Number*) (((uintptr_t)real_ptr + 16) & ~0xf);
54 VXMatShaperFloatData* p = (VXMatShaperFloatData*) aligned;
55
56 p ->real_ptr = real_ptr;
57 return p;
58 }
59
60
61
62 // Free the private data container
63 static
FreeMatShaper(cmsContext ContextID,void * Data)64 void FreeMatShaper(cmsContext ContextID, void* Data)
65 {
66 VXMatShaperFloatData* d = (VXMatShaperFloatData*)Data;
67
68 if (d != NULL)
69 _cmsFree(ContextID, d->real_ptr);
70 }
71
72
73 static
FillShaper(cmsFloat32Number * Table,cmsToneCurve * Curve)74 void FillShaper(cmsFloat32Number* Table, cmsToneCurve* Curve)
75 {
76 int i;
77 cmsFloat32Number R;
78
79 for (i = 0; i < MAX_NODES_IN_CURVE; i++) {
80
81 R = (cmsFloat32Number) i / (cmsFloat32Number) (MAX_NODES_IN_CURVE - 1);
82
83 Table[i] = cmsEvalToneCurveFloat(Curve, R);
84 }
85 }
86
87
88 // Compute the matrix-shaper structure
89 static
SetMatShaper(cmsContext ContextID,cmsToneCurve * Curve1[3],cmsMAT3 * Mat,cmsVEC3 * Off,cmsToneCurve * Curve2[3])90 VXMatShaperFloatData* SetMatShaper(cmsContext ContextID, cmsToneCurve* Curve1[3], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[3])
91 {
92 VXMatShaperFloatData* p;
93 int i, j;
94
95 // Allocate a big chuck of memory to store precomputed tables
96 p = malloc_aligned(ContextID);
97 if (p == NULL) return FALSE;
98
99
100 // Precompute tables
101 FillShaper(p->Shaper1R, Curve1[0]);
102 FillShaper(p->Shaper1G, Curve1[1]);
103 FillShaper(p->Shaper1B, Curve1[2]);
104
105 FillShaper(p->Shaper2R, Curve2[0]);
106 FillShaper(p->Shaper2G, Curve2[1]);
107 FillShaper(p->Shaper2B, Curve2[2]);
108
109
110 for (i=0; i < 3; i++) {
111 for (j=0; j < 3; j++) {
112 p->Mat[i][j] = (cmsFloat32Number) Mat->v[i].n[j];
113 }
114 }
115
116
117 for (i = 0; i < 3; i++) {
118
119 if (Off == NULL) {
120
121 p->UseOff = FALSE;
122 p->Off[i] = 0.0;
123 }
124 else {
125 p->UseOff = TRUE;
126 p->Off[i] = (cmsFloat32Number)Off->n[i];
127
128 }
129 }
130
131
132 return p;
133 }
134
135
136
137 // A fast matrix-shaper evaluator for floating point
138 static
MatShaperFloat(struct _cmstransform_struct * CMMcargo,const void * Input,void * Output,cmsUInt32Number PixelsPerLine,cmsUInt32Number LineCount,const cmsStride * Stride)139 void MatShaperFloat(struct _cmstransform_struct* CMMcargo,
140 const void* Input,
141 void* Output,
142 cmsUInt32Number PixelsPerLine,
143 cmsUInt32Number LineCount,
144 const cmsStride* Stride)
145 {
146 VXMatShaperFloatData* p = (VXMatShaperFloatData*) _cmsGetTransformUserData(CMMcargo);
147 cmsFloat32Number l1, l2, l3;
148 cmsFloat32Number r, g, b;
149 cmsUInt32Number i, ii;
150 cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS];
151 cmsUInt32Number SourceIncrements[cmsMAXCHANNELS];
152 cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS];
153 cmsUInt32Number DestIncrements[cmsMAXCHANNELS];
154
155 const cmsUInt8Number* rin;
156 const cmsUInt8Number* gin;
157 const cmsUInt8Number* bin;
158 const cmsUInt8Number* ain = NULL;
159
160 cmsUInt8Number* rout;
161 cmsUInt8Number* gout;
162 cmsUInt8Number* bout;
163 cmsUInt8Number* aout = NULL;
164
165 cmsUInt32Number nchans, nalpha;
166 cmsUInt32Number strideIn, strideOut;
167
168 _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements);
169 _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements);
170
171 if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA))
172 nalpha = 0;
173
174 strideIn = strideOut = 0;
175 for (i = 0; i < LineCount; i++) {
176
177 rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn;
178 gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn;
179 bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn;
180
181 if (nalpha)
182 ain = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn;
183
184 rout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut;
185 gout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut;
186 bout = (cmsUInt8Number*)Output + DestStartingOrder[2] + strideOut;
187
188 if (nalpha)
189 aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut;
190
191 for (ii = 0; ii < PixelsPerLine; ii++) {
192
193 r = flerp(p->Shaper1R, *(cmsFloat32Number*)rin);
194 g = flerp(p->Shaper1G, *(cmsFloat32Number*)gin);
195 b = flerp(p->Shaper1B, *(cmsFloat32Number*)bin);
196
197 l1 = p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b;
198 l2 = p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b;
199 l3 = p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b;
200
201 if (p->UseOff) {
202
203 l1 += p->Off[0];
204 l2 += p->Off[1];
205 l3 += p->Off[2];
206 }
207
208 *(cmsFloat32Number*)rout = flerp(p->Shaper2R, l1);
209 *(cmsFloat32Number*)gout = flerp(p->Shaper2G, l2);
210 *(cmsFloat32Number*)bout = flerp(p->Shaper2B, l3);
211
212 rin += SourceIncrements[0];
213 gin += SourceIncrements[1];
214 bin += SourceIncrements[2];
215
216 rout += DestIncrements[0];
217 gout += DestIncrements[1];
218 bout += DestIncrements[2];
219
220 if (ain)
221 {
222 *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain;
223 ain += SourceIncrements[3];
224 aout += DestIncrements[3];
225 }
226 }
227
228 strideIn += Stride->BytesPerLineIn;
229 strideOut += Stride->BytesPerLineOut;
230 }
231 }
232
233
234
OptimizeFloatMatrixShaper(_cmsTransform2Fn * TransformFn,void ** UserData,_cmsFreeUserDataFn * FreeUserData,cmsPipeline ** Lut,cmsUInt32Number * InputFormat,cmsUInt32Number * OutputFormat,cmsUInt32Number * dwFlags)235 cmsBool OptimizeFloatMatrixShaper(_cmsTransform2Fn* TransformFn,
236 void** UserData,
237 _cmsFreeUserDataFn* FreeUserData,
238 cmsPipeline** Lut,
239 cmsUInt32Number* InputFormat,
240 cmsUInt32Number* OutputFormat,
241 cmsUInt32Number* dwFlags)
242 {
243 cmsStage* Curve1, *Curve2;
244 cmsStage* Matrix1, *Matrix2;
245 _cmsStageMatrixData* Data1;
246 _cmsStageMatrixData* Data2;
247 cmsMAT3 res;
248 cmsBool IdentityMat = FALSE;
249 cmsPipeline* Dest, *Src;
250 cmsContext ContextID;
251 cmsUInt32Number nChans;
252 cmsFloat64Number factor = 1.0;
253
254
255 // Apply only to floating-point cases
256 if (!T_FLOAT(*InputFormat) || !T_FLOAT(*OutputFormat)) return FALSE;
257
258 // Only works on RGB to RGB and gray to gray
259 if ( !( (T_CHANNELS(*InputFormat) == 3 && T_CHANNELS(*OutputFormat) == 3)) &&
260 !( (T_CHANNELS(*InputFormat) == 1 && T_CHANNELS(*OutputFormat) == 1))) return FALSE;
261
262 // Only works on float
263 if (T_BYTES(*InputFormat) != 4 || T_BYTES(*OutputFormat) != 4) return FALSE;
264
265 // Seems suitable, proceed
266 Src = *Lut;
267
268 // Check for shaper-matrix-matrix-shaper structure, that is what this optimizer stands for
269 if (!cmsPipelineCheckAndRetreiveStages(Src, 4,
270 cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
271 &Curve1, &Matrix1, &Matrix2, &Curve2)) return FALSE;
272
273 ContextID = cmsGetPipelineContextID(Src);
274 nChans = T_CHANNELS(*InputFormat);
275
276 // Get both matrices, which are 3x3
277 Data1 = (_cmsStageMatrixData*) cmsStageData(Matrix1);
278 Data2 = (_cmsStageMatrixData*) cmsStageData(Matrix2);
279
280 // Input offset should be zero
281 if (Data1 ->Offset != NULL) return FALSE;
282
283 if (cmsStageInputChannels(Matrix1) == 1 && cmsStageOutputChannels(Matrix2) == 1)
284 {
285 // This is a gray to gray. Just multiply
286 factor = Data1->Double[0]*Data2->Double[0] +
287 Data1->Double[1]*Data2->Double[1] +
288 Data1->Double[2]*Data2->Double[2];
289
290 if (fabs(1 - factor) < (1.0 / 65535.0)) IdentityMat = TRUE;
291 }
292 else
293 {
294 // Multiply both matrices to get the result
295 _cmsMAT3per(&res, (cmsMAT3*) Data2 ->Double, (cmsMAT3*) Data1 ->Double);
296
297 // Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
298 IdentityMat = FALSE;
299 if (_cmsMAT3isIdentity(&res) && Data2 ->Offset == NULL) {
300
301 // We can get rid of full matrix
302 IdentityMat = TRUE;
303 }
304 }
305
306 // Allocate an empty LUT
307 Dest = cmsPipelineAlloc(ContextID, nChans, nChans);
308 if (!Dest) return FALSE;
309
310 // Assamble the new LUT
311 cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageDup(Curve1));
312
313 if (!IdentityMat) {
314
315 if (nChans == 1)
316 cmsPipelineInsertStage(Dest, cmsAT_END,
317 cmsStageAllocMatrix(ContextID, 1, 1, (const cmsFloat64Number*) &factor, Data2->Offset));
318 else
319 cmsPipelineInsertStage(Dest, cmsAT_END,
320 cmsStageAllocMatrix(ContextID, 3, 3, (const cmsFloat64Number*) &res, Data2 ->Offset));
321 }
322
323
324 cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2));
325
326 // If identity on matrix, we can further optimize the curves, so call the join curves routine
327 if (IdentityMat) {
328
329 OptimizeFloatByJoiningCurves(TransformFn, UserData, FreeUserData, &Dest, InputFormat, OutputFormat, dwFlags);
330 }
331 else {
332 _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
333 _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
334
335 // In this particular optimization, cach� does not help as it takes more time to deal with
336 // the cachthat with the pixel handling
337 *dwFlags |= cmsFLAGS_NOCACHE;
338
339 // Setup the optimizarion routines
340 *UserData = SetMatShaper(ContextID, mpeC1 ->TheCurves, &res, (cmsVEC3*) Data2 ->Offset, mpeC2->TheCurves);
341 *FreeUserData = FreeMatShaper;
342
343 *TransformFn = MatShaperFloat;
344 }
345
346 *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER;
347 cmsPipelineFree(Src);
348 *Lut = Dest;
349 return TRUE;
350 }
351
352
353