1 /*
2  * Copyright (c) 2006 Cyrille Berger  <cberger@cberger.net>
3  * Copyright (c) 2011 Silvio Heinrich <plassy@web.de>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB.  If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  */
20 
21 #ifndef KOOPTIMIZEDCOMPOSITEOPOVER32_H_
22 #define KOOPTIMIZEDCOMPOSITEOPOVER32_H_
23 
24 #include "KoCompositeOpBase.h"
25 #include "KoCompositeOpRegistry.h"
26 #include "KoStreamedMath.h"
27 
28 
29 template<Vc::Implementation _impl>
30 struct OptiDiv {
divScalarOptiDiv31     static ALWAYS_INLINE float divScalar(const float& divident, const float& divisor) {
32 #ifdef __SSE__
33         float result;
34 
35         __m128 x = _mm_set_ss(divisor);
36         __m128 y = _mm_set_ss(divident);
37         x = _mm_rcp_ss(x);
38         x = _mm_mul_ss(x, y);
39 
40 
41         _mm_store_ss(&result, x);
42         return result;
43 #else
44         return divident / divisor;
45 #endif
46 
47     }
48 
divVectorOptiDiv49     static ALWAYS_INLINE Vc::float_v divVector(Vc::float_v::AsArg divident, Vc::float_v::AsArg  divisor) {
50 #ifdef __SSE__
51         return divident * Vc::reciprocal(divisor);
52 #else
53         return divident / divisor;
54 #endif
55 
56     }
57 
58 };
59 
60 
61 template<typename channels_type, typename pixel_type, bool alphaLocked, bool allChannelsFlag>
62 struct OverCompositor32 {
63     struct ParamsWrapper {
ParamsWrapperOverCompositor32::ParamsWrapper64         ParamsWrapper(const KoCompositeOp::ParameterInfo& params)
65             : channelFlags(params.channelFlags)
66         {
67         }
68         const QBitArray &channelFlags;
69     };
70 
71     // \see docs in AlphaDarkenCompositor32
72     template<bool haveMask, bool src_aligned, Vc::Implementation _impl>
compositeVectorOverCompositor3273     static ALWAYS_INLINE void compositeVector(const quint8 *src, quint8 *dst, const quint8 *mask, float opacity, const ParamsWrapper &oparams)
74     {
75         Q_UNUSED(oparams);
76 
77         Vc::float_v src_alpha;
78         Vc::float_v dst_alpha;
79 
80         src_alpha = KoStreamedMath<_impl>::template fetch_alpha_32<src_aligned>(src);
81 
82         bool haveOpacity = opacity != 1.0;
83         Vc::float_v opacity_norm_vec(opacity);
84 
85         Vc::float_v uint8Max((float)255.0);
86         Vc::float_v uint8MaxRec1((float)1.0 / 255.0);
87         Vc::float_v zeroValue(Vc::Zero);
88         Vc::float_v oneValue(Vc::One);
89 
90         src_alpha *= opacity_norm_vec;
91 
92         if (haveMask) {
93             Vc::float_v mask_vec = KoStreamedMath<_impl>::fetch_mask_8(mask);
94             src_alpha *= mask_vec * uint8MaxRec1;
95         }
96 
97         // The source cannot change the colors in the destination,
98         // since its fully transparent
99         if ((src_alpha == zeroValue).isFull()) {
100             return;
101         }
102 
103         dst_alpha = KoStreamedMath<_impl>::template fetch_alpha_32<true>(dst);
104 
105         Vc::float_v src_c1;
106         Vc::float_v src_c2;
107         Vc::float_v src_c3;
108 
109         Vc::float_v dst_c1;
110         Vc::float_v dst_c2;
111         Vc::float_v dst_c3;
112 
113 
114         KoStreamedMath<_impl>::template fetch_colors_32<src_aligned>(src, src_c1, src_c2, src_c3);
115         Vc::float_v src_blend;
116         Vc::float_v new_alpha;
117 
118         if ((dst_alpha == uint8Max).isFull()) {
119             new_alpha = dst_alpha;
120             src_blend = src_alpha * uint8MaxRec1;
121         } else if ((dst_alpha == zeroValue).isFull()) {
122             new_alpha = src_alpha;
123             src_blend = oneValue;
124         } else {
125             /**
126              * The value of new_alpha can have *some* zero values,
127              * which will result in NaN values while division. But
128              * when converted to integers these NaN values will
129              * be converted to zeroes, which is exactly what we need
130              */
131             new_alpha = dst_alpha + (uint8Max - dst_alpha) * src_alpha * uint8MaxRec1;
132 
133             // Optimized version of:
134             //     src_blend = src_alpha / new_alpha;
135             src_blend = OptiDiv<_impl>::divVector(src_alpha, new_alpha);
136 
137         }
138 
139         if (!(src_blend == oneValue).isFull()) {
140             KoStreamedMath<_impl>::template fetch_colors_32<true>(dst, dst_c1, dst_c2, dst_c3);
141 
142             dst_c1 = src_blend * (src_c1 - dst_c1) + dst_c1;
143             dst_c2 = src_blend * (src_c2 - dst_c2) + dst_c2;
144             dst_c3 = src_blend * (src_c3 - dst_c3) + dst_c3;
145 
146         } else {
147             if (!haveMask && !haveOpacity) {
148                 memcpy(dst, src, 4 * Vc::float_v::size());
149                 return;
150             } else {
151                 // opacity has changed the alpha of the source,
152                 // so we can't just memcpy the bytes
153                 dst_c1 = src_c1;
154                 dst_c2 = src_c2;
155                 dst_c3 = src_c3;
156             }
157         }
158 
159         KoStreamedMath<_impl>::write_channels_32(dst, new_alpha, dst_c1, dst_c2, dst_c3);
160     }
161 
162     template <bool haveMask, Vc::Implementation _impl>
compositeOnePixelScalarOverCompositor32163     static ALWAYS_INLINE void compositeOnePixelScalar(const channels_type *src, channels_type *dst, const quint8 *mask, float opacity, const ParamsWrapper &oparams)
164     {
165         using namespace Arithmetic;
166         const qint32 alpha_pos = 3;
167 
168         const float uint8Rec1 = 1.0 / 255.0;
169         const float uint8Max = 255.0;
170 
171         float srcAlpha = src[alpha_pos];
172         srcAlpha *= opacity;
173 
174         if (haveMask) {
175             srcAlpha *= float(*mask) * uint8Rec1;
176         }
177 
178         if (srcAlpha != 0.0) {
179 
180             float dstAlpha = dst[alpha_pos];
181             float srcBlendNorm;
182 
183             if (alphaLocked || dstAlpha == uint8Max) {
184                 srcBlendNorm = srcAlpha * uint8Rec1;
185             } else if (dstAlpha == 0.0) {
186                 dstAlpha = srcAlpha;
187                 srcBlendNorm = 1.0;
188 
189                 if (!allChannelsFlag) {
190                     pixel_type *d = reinterpret_cast<pixel_type*>(dst);
191                     *d = 0; // dstAlpha is already null
192                 }
193             } else {
194                 dstAlpha += (uint8Max - dstAlpha) * srcAlpha * uint8Rec1;
195                 // Optimized version of:
196                 //     srcBlendNorm = srcAlpha / dstAlpha);
197                 srcBlendNorm = OptiDiv<_impl>::divScalar(srcAlpha, dstAlpha);
198 
199             }
200 
201             if(allChannelsFlag) {
202                 if (srcBlendNorm == 1.0) {
203                     if (!alphaLocked) {
204                         const pixel_type *s = reinterpret_cast<const pixel_type*>(src);
205                         pixel_type *d = reinterpret_cast<pixel_type*>(dst);
206                         *d = *s;
207                     } else {
208                         dst[0] = src[0];
209                         dst[1] = src[1];
210                         dst[2] = src[2];
211                     }
212                 } else if (srcBlendNorm != 0.0){
213                     dst[0] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[0], src[0], srcBlendNorm);
214                     dst[1] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[1], src[1], srcBlendNorm);
215                     dst[2] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[2], src[2], srcBlendNorm);
216                 }
217             } else {
218                 const QBitArray &channelFlags = oparams.channelFlags;
219 
220                 if (srcBlendNorm == 1.0) {
221                     if(channelFlags.at(0)) dst[0] = src[0];
222                     if(channelFlags.at(1)) dst[1] = src[1];
223                     if(channelFlags.at(2)) dst[2] = src[2];
224                 } else if (srcBlendNorm != 0.0) {
225                     if(channelFlags.at(0)) dst[0] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[0], src[0], srcBlendNorm);
226                     if(channelFlags.at(1)) dst[1] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[1], src[1], srcBlendNorm);
227                     if(channelFlags.at(2)) dst[2] = KoStreamedMath<_impl>::lerp_mixed_u8_float(dst[2], src[2], srcBlendNorm);
228                 }
229             }
230 
231             if (!alphaLocked) {
232                 dst[alpha_pos] = KoStreamedMath<_impl>::round_float_to_uint(dstAlpha);
233             }
234         }
235     }
236 };
237 
238 /**
239  * An optimized version of a composite op for the use in 4 byte
240  * colorspaces with alpha channel placed at the last byte of
241  * the pixel: C1_C2_C3_A.
242  */
243 template<Vc::Implementation _impl>
244 class KoOptimizedCompositeOpOver32 : public KoCompositeOp
245 {
246 public:
KoOptimizedCompositeOpOver32(const KoColorSpace * cs)247     KoOptimizedCompositeOpOver32(const KoColorSpace* cs)
248         : KoCompositeOp(cs, COMPOSITE_OVER, i18n("Normal"), KoCompositeOp::categoryMix()) {}
249 
250     using KoCompositeOp::composite;
251 
composite(const KoCompositeOp::ParameterInfo & params)252     virtual void composite(const KoCompositeOp::ParameterInfo& params) const
253     {
254         if(params.maskRowStart) {
255             composite<true>(params);
256         } else {
257             composite<false>(params);
258         }
259     }
260 
261     template <bool haveMask>
composite(const KoCompositeOp::ParameterInfo & params)262     inline void composite(const KoCompositeOp::ParameterInfo& params) const {
263         if (params.channelFlags.isEmpty() ||
264             params.channelFlags == QBitArray(4, true)) {
265 
266             KoStreamedMath<_impl>::template genericComposite32<haveMask, false, OverCompositor32<quint8, quint32, false, true> >(params);
267         } else {
268             const bool allChannelsFlag =
269                 params.channelFlags.at(0) &&
270                 params.channelFlags.at(1) &&
271                 params.channelFlags.at(2);
272 
273             const bool alphaLocked =
274                 !params.channelFlags.at(3);
275 
276             if (allChannelsFlag && alphaLocked) {
277                 KoStreamedMath<_impl>::template genericComposite32_novector<haveMask, false, OverCompositor32<quint8, quint32, true, true> >(params);
278             } else if (!allChannelsFlag && !alphaLocked) {
279                 KoStreamedMath<_impl>::template genericComposite32_novector<haveMask, false, OverCompositor32<quint8, quint32, false, false> >(params);
280             } else /*if (!allChannelsFlag && alphaLocked) */{
281                 KoStreamedMath<_impl>::template genericComposite32_novector<haveMask, false, OverCompositor32<quint8, quint32, true, false> >(params);
282             }
283         }
284     }
285 };
286 
287 #endif // KOOPTIMIZEDCOMPOSITEOPOVER32_H_
288