1 //***************************************************************************/
2 // This software is released under the 2-Clause BSD license, included
3 // below.
4 //
5 // Copyright (c) 2019, Aous Naman
6 // Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7 // Copyright (c) 2019, The University of New South Wales, Australia
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // 1. Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //***************************************************************************/
32 // This file is part of the OpenJPH software implementation.
33 // File: ojph_colour.cpp
34 // Author: Aous Naman
35 // Date: 28 August 2019
36 //***************************************************************************/
37 
38 #include <cmath>
39 
40 #include "ojph_defs.h"
41 #include "ojph_arch.h"
42 #include "ojph_colour.h"
43 #include "ojph_colour_local.h"
44 
45 namespace ojph {
46   namespace local {
47 
48     //////////////////////////////////////////////////////////////////////////
49     void (*cnvrt_si32_to_si32_shftd)
50       (const si32 *sp, si32 *dp, int shift, int width)
51       = gen_cnvrt_si32_to_si32_shftd;
52 
53     ////////////////////////////////////////////////////////////////////////////
54     void (*cnvrt_si32_to_float_shftd)
55       (const si32 *sp, float *dp, float mul, int width)
56       = gen_cnvrt_si32_to_float_shftd;
57 
58     ////////////////////////////////////////////////////////////////////////////
59     void (*cnvrt_si32_to_float)
60       (const si32 *sp, float *dp, float mul, int width)
61       = gen_cnvrt_si32_to_float;
62 
63     ////////////////////////////////////////////////////////////////////////////
64     void (*cnvrt_float_to_si32_shftd)
65       (const float *sp, si32 *dp, float mul, int width)
66       = gen_cnvrt_float_to_si32_shftd;
67 
68     ////////////////////////////////////////////////////////////////////////////
69     void (*cnvrt_float_to_si32)
70       (const float *sp, si32 *dp, float mul, int width)
71       = gen_cnvrt_float_to_si32;
72 
73     ////////////////////////////////////////////////////////////////////////////
74     void (*rct_forward)
75       (const si32 *r, const si32 *g, const si32 *b,
76        si32 *y, si32 *cb, si32 *cr, int repeat)
77       = gen_rct_forward;
78 
79     ////////////////////////////////////////////////////////////////////////////
80     void (*rct_backward)
81       (const si32 *y, const si32 *cb, const si32 *cr,
82        si32 *r, si32 *g, si32 *b, int repeat)
83       = gen_rct_backward;
84 
85     ////////////////////////////////////////////////////////////////////////////
86     void (*ict_forward)
87       (const float *r, const float *g, const float *b,
88        float *y, float *cb, float *cr, int repeat)
89       = gen_ict_forward;
90 
91     ////////////////////////////////////////////////////////////////////////////
92     void (*ict_backward)
93       (const float *y, const float *cb, const float *cr,
94        float *r, float *g, float *b, int repeat)
95       = gen_ict_backward;
96 
97     ////////////////////////////////////////////////////////////////////////////
98     static bool colour_transform_functions_initialized = false;
99 
100     //////////////////////////////////////////////////////////////////////////
init_colour_transform_functions()101     void init_colour_transform_functions()
102     {
103       if (colour_transform_functions_initialized)
104         return;
105 
106       cnvrt_si32_to_si32_shftd = gen_cnvrt_si32_to_si32_shftd;
107       cnvrt_si32_to_float_shftd = gen_cnvrt_si32_to_float_shftd;
108       cnvrt_si32_to_float = gen_cnvrt_si32_to_float;
109       cnvrt_float_to_si32_shftd = gen_cnvrt_float_to_si32_shftd;
110       cnvrt_float_to_si32 = gen_cnvrt_float_to_si32;
111       rct_forward = gen_rct_forward;
112       rct_backward = gen_rct_backward;
113       ict_forward = gen_ict_forward;
114       ict_backward = gen_ict_backward;
115 
116 #ifndef OJPH_DISABLE_INTEL_SIMD
117       int level = cpu_ext_level();
118 
119       if (level >= 2)
120       {
121         cnvrt_si32_to_float_shftd = sse_cnvrt_si32_to_float_shftd;
122         cnvrt_si32_to_float = sse_cnvrt_si32_to_float;
123         cnvrt_float_to_si32_shftd = sse_cnvrt_float_to_si32_shftd;
124         cnvrt_float_to_si32 = sse_cnvrt_float_to_si32;
125         ict_forward = sse_ict_forward;
126         ict_backward = sse_ict_backward;
127       }
128 
129       if (level >= 3)
130       {
131         cnvrt_float_to_si32_shftd = sse2_cnvrt_float_to_si32_shftd;
132         cnvrt_float_to_si32 = sse2_cnvrt_float_to_si32;
133         cnvrt_si32_to_si32_shftd = sse2_cnvrt_si32_to_si32_shftd;
134         rct_forward = sse2_rct_forward;
135         rct_backward = sse2_rct_backward;
136       }
137 
138       if (level >= 7)
139       {
140         cnvrt_si32_to_float_shftd = avx_cnvrt_si32_to_float_shftd;
141         cnvrt_si32_to_float = avx_cnvrt_si32_to_float;
142         cnvrt_float_to_si32_shftd = avx_cnvrt_float_to_si32_shftd;
143         cnvrt_float_to_si32 = avx_cnvrt_float_to_si32;
144         ict_forward = avx_ict_forward;
145         ict_backward = avx_ict_backward;
146       }
147 
148       if (level >= 8)
149       {
150         cnvrt_si32_to_si32_shftd = avx2_cnvrt_si32_to_si32_shftd;
151         rct_forward = avx2_rct_forward;
152         rct_backward = avx2_rct_backward;
153       }
154 #endif
155 
156       colour_transform_functions_initialized = true;
157     }
158 
159 
160     //////////////////////////////////////////////////////////////////////////
161     const float CT_CNST::ALPHA_RF = 0.299f;
162     const float CT_CNST::ALPHA_GF = 0.587f;
163     const float CT_CNST::ALPHA_BF = 0.114f;
164     const float CT_CNST::BETA_CbF = float(0.5/(1-double(CT_CNST::ALPHA_BF)));
165     const float CT_CNST::BETA_CrF = float(0.5/(1-double(CT_CNST::ALPHA_RF)));
166     const float CT_CNST::GAMMA_CB2G =
167       float(2.0*double(ALPHA_BF)*(1.0-double(ALPHA_BF))/double(ALPHA_GF));
168     const float CT_CNST::GAMMA_CR2G =
169       float(2.0*double(ALPHA_RF)*(1.0-double(ALPHA_RF))/double(ALPHA_GF));
170     const float CT_CNST::GAMMA_CB2B = float(2.0 * (1.0 - double(ALPHA_BF)));
171     const float CT_CNST::GAMMA_CR2R = float(2.0 * (1.0 - double(ALPHA_RF)));
172 
173     //////////////////////////////////////////////////////////////////////////
gen_cnvrt_si32_to_si32_shftd(const si32 * sp,si32 * dp,int shift,int width)174     void gen_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift,
175                                       int width)
176     {
177       for (int i = width; i > 0; --i)
178         *dp++ = *sp++ + shift;
179     }
180 
181     //////////////////////////////////////////////////////////////////////////
gen_cnvrt_si32_to_float_shftd(const si32 * sp,float * dp,float mul,int width)182     void gen_cnvrt_si32_to_float_shftd(const si32 *sp, float *dp, float mul,
183                                        int width)
184     {
185       for (int i = width; i > 0; --i)
186         *dp++ = (float)*sp++ * mul - 0.5f;
187     }
188 
189     //////////////////////////////////////////////////////////////////////////
gen_cnvrt_si32_to_float(const si32 * sp,float * dp,float mul,int width)190     void gen_cnvrt_si32_to_float(const si32 *sp, float *dp, float mul,
191                                  int width)
192     {
193       for (int i = width; i > 0; --i)
194         *dp++ = (float)*sp++ * mul;
195     }
196 
197     //////////////////////////////////////////////////////////////////////////
gen_cnvrt_float_to_si32_shftd(const float * sp,si32 * dp,float mul,int width)198     void gen_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul,
199                                        int width)
200     {
201       for (int i = width; i > 0; --i)
202         *dp++ = ojph_round((*sp++ + 0.5f) * mul);
203     }
204 
205     //////////////////////////////////////////////////////////////////////////
gen_cnvrt_float_to_si32(const float * sp,si32 * dp,float mul,int width)206     void gen_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul,
207                                  int width)
208     {
209       for (int i = width; i > 0; --i)
210         *dp++ = ojph_round(*sp++ * mul);
211     }
212 
213     //////////////////////////////////////////////////////////////////////////
gen_rct_forward(const si32 * r,const si32 * g,const si32 * b,si32 * y,si32 * cb,si32 * cr,int repeat)214     void gen_rct_forward(const si32 *r, const si32 *g, const si32 *b,
215                          si32 *y, si32 *cb, si32 *cr, int repeat)
216     {
217       for (int i = repeat; i > 0; --i)
218       {
219         *y++ = (*r + (*g << 1) + *b) >> 2;
220         *cb++ = (*b++ - *g);
221         *cr++ = (*r++ - *g++);
222       }
223     }
224 
225     //////////////////////////////////////////////////////////////////////////
gen_rct_backward(const si32 * y,const si32 * cb,const si32 * cr,si32 * r,si32 * g,si32 * b,int repeat)226     void gen_rct_backward(const si32 *y, const si32 *cb, const si32 *cr,
227                           si32 *r, si32 *g, si32 *b, int repeat)
228     {
229       for (int i = repeat; i > 0; --i)
230       {
231         *g = *y++ - ((*cb + *cr)>>2);
232         *b++ = *cb++ + *g;
233         *r++ = *cr++ + *g++;
234       }
235     }
236 
237     //////////////////////////////////////////////////////////////////////////
gen_ict_forward(const float * r,const float * g,const float * b,float * y,float * cb,float * cr,int repeat)238     void gen_ict_forward(const float *r, const float *g, const float *b,
239                          float *y, float *cb, float *cr, int repeat)
240     {
241       for (int i = repeat; i > 0; --i)
242       {
243         *y = CT_CNST::ALPHA_RF * *r
244            + CT_CNST::ALPHA_GF * *g++
245            + CT_CNST::ALPHA_BF * *b;
246         *cb++ = CT_CNST::BETA_CbF * (*b++ - *y);
247         *cr++ = CT_CNST::BETA_CrF * (*r++ - *y++);
248       }
249     }
250 
251     //////////////////////////////////////////////////////////////////////////
gen_ict_backward(const float * y,const float * cb,const float * cr,float * r,float * g,float * b,int repeat)252     void gen_ict_backward(const float *y, const float *cb, const float *cr,
253                           float *r, float *g, float *b, int repeat)
254     {
255       for (int i = repeat; i > 0; --i)
256       {
257         *g++ = *y - CT_CNST::GAMMA_CR2G * *cr - CT_CNST::GAMMA_CB2G * *cb;
258         *r++ = *y + CT_CNST::GAMMA_CR2R * *cr++;
259         *b++ = *y++ + CT_CNST::GAMMA_CB2B * *cb++;
260       }
261     }
262 
263   }
264 }
265