1 //***************************************************************************/ 2 // This software is released under the 2-Clause BSD license, included 3 // below. 4 // 5 // Copyright (c) 2019, Aous Naman 6 // Copyright (c) 2019, Kakadu Software Pty Ltd, Australia 7 // Copyright (c) 2019, The University of New South Wales, Australia 8 // 9 // Redistribution and use in source and binary forms, with or without 10 // modification, are permitted provided that the following conditions are 11 // met: 12 // 13 // 1. Redistributions of source code must retain the above copyright 14 // notice, this list of conditions and the following disclaimer. 15 // 16 // 2. Redistributions in binary form must reproduce the above copyright 17 // notice, this list of conditions and the following disclaimer in the 18 // documentation and/or other materials provided with the distribution. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 //***************************************************************************/ 32 // This file is part of the OpenJPH software implementation. 33 // File: ojph_colour.cpp 34 // Author: Aous Naman 35 // Date: 28 August 2019 36 //***************************************************************************/ 37 38 #include <cmath> 39 40 #include "ojph_defs.h" 41 #include "ojph_arch.h" 42 #include "ojph_colour.h" 43 #include "ojph_colour_local.h" 44 45 namespace ojph { 46 namespace local { 47 48 ////////////////////////////////////////////////////////////////////////// 49 void (*cnvrt_si32_to_si32_shftd) 50 (const si32 *sp, si32 *dp, int shift, int width) 51 = gen_cnvrt_si32_to_si32_shftd; 52 53 //////////////////////////////////////////////////////////////////////////// 54 void (*cnvrt_si32_to_float_shftd) 55 (const si32 *sp, float *dp, float mul, int width) 56 = gen_cnvrt_si32_to_float_shftd; 57 58 //////////////////////////////////////////////////////////////////////////// 59 void (*cnvrt_si32_to_float) 60 (const si32 *sp, float *dp, float mul, int width) 61 = gen_cnvrt_si32_to_float; 62 63 //////////////////////////////////////////////////////////////////////////// 64 void (*cnvrt_float_to_si32_shftd) 65 (const float *sp, si32 *dp, float mul, int width) 66 = gen_cnvrt_float_to_si32_shftd; 67 68 //////////////////////////////////////////////////////////////////////////// 69 void (*cnvrt_float_to_si32) 70 (const float *sp, si32 *dp, float mul, int width) 71 = gen_cnvrt_float_to_si32; 72 73 //////////////////////////////////////////////////////////////////////////// 74 void (*rct_forward) 75 (const si32 *r, const si32 *g, const si32 *b, 76 si32 *y, si32 *cb, si32 *cr, int repeat) 77 = gen_rct_forward; 78 79 //////////////////////////////////////////////////////////////////////////// 80 void (*rct_backward) 81 (const si32 *y, const si32 *cb, const si32 *cr, 82 si32 *r, si32 *g, si32 *b, int repeat) 83 = gen_rct_backward; 84 85 //////////////////////////////////////////////////////////////////////////// 86 void (*ict_forward) 87 (const float *r, const float *g, const float *b, 88 float *y, float *cb, float *cr, int repeat) 89 = gen_ict_forward; 90 91 //////////////////////////////////////////////////////////////////////////// 92 void (*ict_backward) 93 (const float *y, const float *cb, const float *cr, 94 float *r, float *g, float *b, int repeat) 95 = gen_ict_backward; 96 97 //////////////////////////////////////////////////////////////////////////// 98 static bool colour_transform_functions_initialized = false; 99 100 ////////////////////////////////////////////////////////////////////////// init_colour_transform_functions()101 void init_colour_transform_functions() 102 { 103 if (colour_transform_functions_initialized) 104 return; 105 106 cnvrt_si32_to_si32_shftd = gen_cnvrt_si32_to_si32_shftd; 107 cnvrt_si32_to_float_shftd = gen_cnvrt_si32_to_float_shftd; 108 cnvrt_si32_to_float = gen_cnvrt_si32_to_float; 109 cnvrt_float_to_si32_shftd = gen_cnvrt_float_to_si32_shftd; 110 cnvrt_float_to_si32 = gen_cnvrt_float_to_si32; 111 rct_forward = gen_rct_forward; 112 rct_backward = gen_rct_backward; 113 ict_forward = gen_ict_forward; 114 ict_backward = gen_ict_backward; 115 116 #ifndef OJPH_DISABLE_INTEL_SIMD 117 int level = cpu_ext_level(); 118 119 if (level >= 2) 120 { 121 cnvrt_si32_to_float_shftd = sse_cnvrt_si32_to_float_shftd; 122 cnvrt_si32_to_float = sse_cnvrt_si32_to_float; 123 cnvrt_float_to_si32_shftd = sse_cnvrt_float_to_si32_shftd; 124 cnvrt_float_to_si32 = sse_cnvrt_float_to_si32; 125 ict_forward = sse_ict_forward; 126 ict_backward = sse_ict_backward; 127 } 128 129 if (level >= 3) 130 { 131 cnvrt_float_to_si32_shftd = sse2_cnvrt_float_to_si32_shftd; 132 cnvrt_float_to_si32 = sse2_cnvrt_float_to_si32; 133 cnvrt_si32_to_si32_shftd = sse2_cnvrt_si32_to_si32_shftd; 134 rct_forward = sse2_rct_forward; 135 rct_backward = sse2_rct_backward; 136 } 137 138 if (level >= 7) 139 { 140 cnvrt_si32_to_float_shftd = avx_cnvrt_si32_to_float_shftd; 141 cnvrt_si32_to_float = avx_cnvrt_si32_to_float; 142 cnvrt_float_to_si32_shftd = avx_cnvrt_float_to_si32_shftd; 143 cnvrt_float_to_si32 = avx_cnvrt_float_to_si32; 144 ict_forward = avx_ict_forward; 145 ict_backward = avx_ict_backward; 146 } 147 148 if (level >= 8) 149 { 150 cnvrt_si32_to_si32_shftd = avx2_cnvrt_si32_to_si32_shftd; 151 rct_forward = avx2_rct_forward; 152 rct_backward = avx2_rct_backward; 153 } 154 #endif 155 156 colour_transform_functions_initialized = true; 157 } 158 159 160 ////////////////////////////////////////////////////////////////////////// 161 const float CT_CNST::ALPHA_RF = 0.299f; 162 const float CT_CNST::ALPHA_GF = 0.587f; 163 const float CT_CNST::ALPHA_BF = 0.114f; 164 const float CT_CNST::BETA_CbF = float(0.5/(1-double(CT_CNST::ALPHA_BF))); 165 const float CT_CNST::BETA_CrF = float(0.5/(1-double(CT_CNST::ALPHA_RF))); 166 const float CT_CNST::GAMMA_CB2G = 167 float(2.0*double(ALPHA_BF)*(1.0-double(ALPHA_BF))/double(ALPHA_GF)); 168 const float CT_CNST::GAMMA_CR2G = 169 float(2.0*double(ALPHA_RF)*(1.0-double(ALPHA_RF))/double(ALPHA_GF)); 170 const float CT_CNST::GAMMA_CB2B = float(2.0 * (1.0 - double(ALPHA_BF))); 171 const float CT_CNST::GAMMA_CR2R = float(2.0 * (1.0 - double(ALPHA_RF))); 172 173 ////////////////////////////////////////////////////////////////////////// gen_cnvrt_si32_to_si32_shftd(const si32 * sp,si32 * dp,int shift,int width)174 void gen_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift, 175 int width) 176 { 177 for (int i = width; i > 0; --i) 178 *dp++ = *sp++ + shift; 179 } 180 181 ////////////////////////////////////////////////////////////////////////// gen_cnvrt_si32_to_float_shftd(const si32 * sp,float * dp,float mul,int width)182 void gen_cnvrt_si32_to_float_shftd(const si32 *sp, float *dp, float mul, 183 int width) 184 { 185 for (int i = width; i > 0; --i) 186 *dp++ = (float)*sp++ * mul - 0.5f; 187 } 188 189 ////////////////////////////////////////////////////////////////////////// gen_cnvrt_si32_to_float(const si32 * sp,float * dp,float mul,int width)190 void gen_cnvrt_si32_to_float(const si32 *sp, float *dp, float mul, 191 int width) 192 { 193 for (int i = width; i > 0; --i) 194 *dp++ = (float)*sp++ * mul; 195 } 196 197 ////////////////////////////////////////////////////////////////////////// gen_cnvrt_float_to_si32_shftd(const float * sp,si32 * dp,float mul,int width)198 void gen_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul, 199 int width) 200 { 201 for (int i = width; i > 0; --i) 202 *dp++ = ojph_round((*sp++ + 0.5f) * mul); 203 } 204 205 ////////////////////////////////////////////////////////////////////////// gen_cnvrt_float_to_si32(const float * sp,si32 * dp,float mul,int width)206 void gen_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul, 207 int width) 208 { 209 for (int i = width; i > 0; --i) 210 *dp++ = ojph_round(*sp++ * mul); 211 } 212 213 ////////////////////////////////////////////////////////////////////////// gen_rct_forward(const si32 * r,const si32 * g,const si32 * b,si32 * y,si32 * cb,si32 * cr,int repeat)214 void gen_rct_forward(const si32 *r, const si32 *g, const si32 *b, 215 si32 *y, si32 *cb, si32 *cr, int repeat) 216 { 217 for (int i = repeat; i > 0; --i) 218 { 219 *y++ = (*r + (*g << 1) + *b) >> 2; 220 *cb++ = (*b++ - *g); 221 *cr++ = (*r++ - *g++); 222 } 223 } 224 225 ////////////////////////////////////////////////////////////////////////// gen_rct_backward(const si32 * y,const si32 * cb,const si32 * cr,si32 * r,si32 * g,si32 * b,int repeat)226 void gen_rct_backward(const si32 *y, const si32 *cb, const si32 *cr, 227 si32 *r, si32 *g, si32 *b, int repeat) 228 { 229 for (int i = repeat; i > 0; --i) 230 { 231 *g = *y++ - ((*cb + *cr)>>2); 232 *b++ = *cb++ + *g; 233 *r++ = *cr++ + *g++; 234 } 235 } 236 237 ////////////////////////////////////////////////////////////////////////// gen_ict_forward(const float * r,const float * g,const float * b,float * y,float * cb,float * cr,int repeat)238 void gen_ict_forward(const float *r, const float *g, const float *b, 239 float *y, float *cb, float *cr, int repeat) 240 { 241 for (int i = repeat; i > 0; --i) 242 { 243 *y = CT_CNST::ALPHA_RF * *r 244 + CT_CNST::ALPHA_GF * *g++ 245 + CT_CNST::ALPHA_BF * *b; 246 *cb++ = CT_CNST::BETA_CbF * (*b++ - *y); 247 *cr++ = CT_CNST::BETA_CrF * (*r++ - *y++); 248 } 249 } 250 251 ////////////////////////////////////////////////////////////////////////// gen_ict_backward(const float * y,const float * cb,const float * cr,float * r,float * g,float * b,int repeat)252 void gen_ict_backward(const float *y, const float *cb, const float *cr, 253 float *r, float *g, float *b, int repeat) 254 { 255 for (int i = repeat; i > 0; --i) 256 { 257 *g++ = *y - CT_CNST::GAMMA_CR2G * *cr - CT_CNST::GAMMA_CB2G * *cb; 258 *r++ = *y + CT_CNST::GAMMA_CR2R * *cr++; 259 *b++ = *y++ + CT_CNST::GAMMA_CB2B * *cb++; 260 } 261 } 262 263 } 264 } 265