1 /* 2 * Copyright (c) 2006 Cyrille Berger <cberger@cberger.net> 3 * Copyright (c) 2011 Silvio Heinrich <plassy@web.de> 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 */ 20 21 #ifndef KOOPTIMIZEDCOMPOSITEOPOVER32_H_ 22 #define KOOPTIMIZEDCOMPOSITEOPOVER32_H_ 23 24 #include "KoCompositeOpBase.h" 25 #include "KoCompositeOpRegistry.h" 26 #include "KoStreamedMath.h" 27 28 29 template<Vc::Implementation _impl> 30 struct OptiDiv { divScalarOptiDiv31 static ALWAYS_INLINE float divScalar(const float& divident, const float& divisor) { 32 #ifdef __SSE__ 33 float result; 34 35 __m128 x = _mm_set_ss(divisor); 36 __m128 y = _mm_set_ss(divident); 37 x = _mm_rcp_ss(x); 38 x = _mm_mul_ss(x, y); 39 40 41 _mm_store_ss(&result, x); 42 return result; 43 #else 44 return divident / divisor; 45 #endif 46 47 } 48 divVectorOptiDiv49 static ALWAYS_INLINE Vc::float_v divVector(Vc::float_v::AsArg divident, Vc::float_v::AsArg divisor) { 50 #ifdef __SSE__ 51 return divident * Vc::reciprocal(divisor); 52 #else 53 return divident / divisor; 54 #endif 55 56 } 57 58 }; 59 60 61 template<typename channels_type, typename pixel_type, bool alphaLocked, bool allChannelsFlag> 62 struct OverCompositor32 { 63 struct ParamsWrapper { ParamsWrapperOverCompositor32::ParamsWrapper64 ParamsWrapper(const KoCompositeOp::ParameterInfo& params) 65 : channelFlags(params.channelFlags) 66 { 67 } 68 const QBitArray &channelFlags; 69 }; 70 71 // \see docs in AlphaDarkenCompositor32 72 template<bool haveMask, bool src_aligned, Vc::Implementation _impl> compositeVectorOverCompositor3273 static ALWAYS_INLINE void compositeVector(const quint8 *src, quint8 *dst, const quint8 *mask, float opacity, const ParamsWrapper &oparams) 74 { 75 Q_UNUSED(oparams); 76 77 Vc::float_v src_alpha; 78 Vc::float_v dst_alpha; 79 80 src_alpha = KoStreamedMath<_impl>::template fetch_alpha_32<src_aligned>(src); 81 82 bool haveOpacity = opacity != 1.0; 83 Vc::float_v opacity_norm_vec(opacity); 84 85 Vc::float_v uint8Max((float)255.0); 86 Vc::float_v uint8MaxRec1((float)1.0 / 255.0); 87 Vc::float_v zeroValue(Vc::Zero); 88 Vc::float_v oneValue(Vc::One); 89 90 src_alpha *= opacity_norm_vec; 91 92 if (haveMask) { 93 Vc::float_v mask_vec = KoStreamedMath<_impl>::fetch_mask_8(mask); 94 src_alpha *= mask_vec * uint8MaxRec1; 95 } 96 97 // The source cannot change the colors in the destination, 98 // since its fully transparent 99 if ((src_alpha == zeroValue).isFull()) { 100 return; 101 } 102 103 dst_alpha = KoStreamedMath<_impl>::template fetch_alpha_32<true>(dst); 104 105 Vc::float_v src_c1; 106 Vc::float_v src_c2; 107 Vc::float_v src_c3; 108 109 Vc::float_v dst_c1; 110 Vc::float_v dst_c2; 111 Vc::float_v dst_c3; 112 113 114 KoStreamedMath<_impl>::template fetch_colors_32<src_aligned>(src, src_c1, src_c2, src_c3); 115 Vc::float_v src_blend; 116 Vc::float_v new_alpha; 117 118 if ((dst_alpha == uint8Max).isFull()) { 119 new_alpha = dst_alpha; 120 src_blend = src_alpha * uint8MaxRec1; 121 } else if ((dst_alpha == zeroValue).isFull()) { 122 new_alpha = src_alpha; 123 src_blend = oneValue; 124 } else { 125 /** 126 * The value of new_alpha can have *some* zero values, 127 * which will result in NaN values while division. But 128 * when converted to integers these NaN values will 129 * be converted to zeroes, which is exactly what we need 130 */ 131 new_alpha = dst_alpha + (uint8Max - dst_alpha) * src_alpha * uint8MaxRec1; 132 133 // Optimized version of: 134 // src_blend = src_alpha / new_alpha; 135 src_blend = OptiDiv<_impl>::divVector(src_alpha, new_alpha); 136 137 } 138 139 if (!(src_blend == oneValue).isFull()) { 140 KoStreamedMath<_impl>::template fetch_colors_32<true>(dst, dst_c1, dst_c2, dst_c3); 141 142 dst_c1 = src_blend * (src_c1 - dst_c1) + dst_c1; 143 dst_c2 = src_blend * (src_c2 - dst_c2) + dst_c2; 144 dst_c3 = src_blend * (src_c3 - dst_c3) + dst_c3; 145 146 } else { 147 if (!haveMask && !haveOpacity) { 148 memcpy(dst, src, 4 * Vc::float_v::size()); 149 return; 150 } else { 151 // opacity has changed the alpha of the source, 152 // so we can't just memcpy the bytes 153 dst_c1 = src_c1; 154 dst_c2 = src_c2; 155 dst_c3 = src_c3; 156 } 157 } 158 159 KoStreamedMath<_impl>::write_channels_32(dst, new_alpha, dst_c1, dst_c2, dst_c3); 160 } 161 162 template <bool haveMask, Vc::Implementation _impl> compositeOnePixelScalarOverCompositor32163 static ALWAYS_INLINE void compositeOnePixelScalar(const channels_type *src, channels_type *dst, const quint8 *mask, float opacity, const ParamsWrapper &oparams) 164 { 165 using namespace Arithmetic; 166 const qint32 alpha_pos = 3; 167 168 const float uint8Rec1 = 1.0 / 255.0; 169 const float uint8Max = 255.0; 170 171 float srcAlpha = src[alpha_pos]; 172 srcAlpha *= opacity; 173 174 if (haveMask) { 175 srcAlpha *= float(*mask) * uint8Rec1; 176 } 177 178 if (srcAlpha != 0.0) { 179 180 float dstAlpha = dst[alpha_pos]; 181 float srcBlendNorm; 182 183 if (alphaLocked || dstAlpha == uint8Max) { 184 srcBlendNorm = srcAlpha * uint8Rec1; 185 } else if (dstAlpha == 0.0) { 186 dstAlpha = srcAlpha; 187 srcBlendNorm = 1.0; 188 189 if (!allChannelsFlag) { 190 pixel_type *d = reinterpret_cast<pixel_type*>(dst); 191 *d = 0; // dstAlpha is already null 192 } 193 } else { 194 dstAlpha += (uint8Max - dstAlpha) * srcAlpha * uint8Rec1; 195 // Optimized version of: 196 // srcBlendNorm = srcAlpha / dstAlpha); 197 srcBlendNorm = OptiDiv<_impl>::divScalar(srcAlpha, dstAlpha); 198 199 } 200 201 if(allChannelsFlag) { 202 if (srcBlendNorm == 1.0) { 203 if (!alphaLocked) { 204 const pixel_type *s = reinterpret_cast<const pixel_type*>(src); 205 pixel_type *d = reinterpret_cast<pixel_type*>(dst); 206 *d = *s; 207 } else { 208 dst[0] = src[0]; 209 dst[1] = src[1]; 210 dst[2] = src[2]; 211 } 212 } else if (srcBlendNorm != 0.0){ 213 dst[0] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[0], src[0], srcBlendNorm); 214 dst[1] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[1], src[1], srcBlendNorm); 215 dst[2] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[2], src[2], srcBlendNorm); 216 } 217 } else { 218 const QBitArray &channelFlags = oparams.channelFlags; 219 220 if (srcBlendNorm == 1.0) { 221 if(channelFlags.at(0)) dst[0] = src[0]; 222 if(channelFlags.at(1)) dst[1] = src[1]; 223 if(channelFlags.at(2)) dst[2] = src[2]; 224 } else if (srcBlendNorm != 0.0) { 225 if(channelFlags.at(0)) dst[0] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[0], src[0], srcBlendNorm); 226 if(channelFlags.at(1)) dst[1] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[1], src[1], srcBlendNorm); 227 if(channelFlags.at(2)) dst[2] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[2], src[2], srcBlendNorm); 228 } 229 } 230 231 if (!alphaLocked) { 232 dst[alpha_pos] = KoStreamedMath<_impl>::round_float_to_uint(dstAlpha); 233 } 234 } 235 } 236 }; 237 238 /** 239 * An optimized version of a composite op for the use in 4 byte 240 * colorspaces with alpha channel placed at the last byte of 241 * the pixel: C1_C2_C3_A. 242 */ 243 template<Vc::Implementation _impl> 244 class KoOptimizedCompositeOpOver32 : public KoCompositeOp 245 { 246 public: KoOptimizedCompositeOpOver32(const KoColorSpace * cs)247 KoOptimizedCompositeOpOver32(const KoColorSpace* cs) 248 : KoCompositeOp(cs, COMPOSITE_OVER, i18n("Normal"), KoCompositeOp::categoryMix()) {} 249 250 using KoCompositeOp::composite; 251 composite(const KoCompositeOp::ParameterInfo & params)252 virtual void composite(const KoCompositeOp::ParameterInfo& params) const 253 { 254 if(params.maskRowStart) { 255 composite<true>(params); 256 } else { 257 composite<false>(params); 258 } 259 } 260 261 template <bool haveMask> composite(const KoCompositeOp::ParameterInfo & params)262 inline void composite(const KoCompositeOp::ParameterInfo& params) const { 263 if (params.channelFlags.isEmpty() || 264 params.channelFlags == QBitArray(4, true)) { 265 266 KoStreamedMath<_impl>::template genericComposite32<haveMask, false, OverCompositor32<quint8, quint32, false, true> >(params); 267 } else { 268 const bool allChannelsFlag = 269 params.channelFlags.at(0) && 270 params.channelFlags.at(1) && 271 params.channelFlags.at(2); 272 273 const bool alphaLocked = 274 !params.channelFlags.at(3); 275 276 if (allChannelsFlag && alphaLocked) { 277 KoStreamedMath<_impl>::template genericComposite32_novector<haveMask, false, OverCompositor32<quint8, quint32, true, true> >(params); 278 } else if (!allChannelsFlag && !alphaLocked) { 279 KoStreamedMath<_impl>::template genericComposite32_novector<haveMask, false, OverCompositor32<quint8, quint32, false, false> >(params); 280 } else /*if (!allChannelsFlag && alphaLocked) */{ 281 KoStreamedMath<_impl>::template genericComposite32_novector<haveMask, false, OverCompositor32<quint8, quint32, true, false> >(params); 282 } 283 } 284 } 285 }; 286 287 #endif // KOOPTIMIZEDCOMPOSITEOPOVER32_H_ 288