1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtGui module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include <private/qdrawhelper_p.h>
41 #include <private/qdrawingprimitive_sse2_p.h>
42 #include <private/qpaintengine_raster_p.h>
43 
44 #if defined(QT_COMPILER_SUPPORTS_SSE4_1)
45 
46 QT_BEGIN_NAMESPACE
47 
48 #ifndef __AVX2__
49 template<bool RGBA>
convertARGBToARGB32PM_sse4(uint * buffer,const uint * src,int count)50 static void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count)
51 {
52     int i = 0;
53     const __m128i alphaMask = _mm_set1_epi32(0xff000000);
54     const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
55     const __m128i shuffleMask = _mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15);
56     const __m128i half = _mm_set1_epi16(0x0080);
57     const __m128i zero = _mm_setzero_si128();
58 
59     for (; i < count - 3; i += 4) {
60         __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]);
61         if (!_mm_testz_si128(srcVector, alphaMask)) {
62             if (!_mm_testc_si128(srcVector, alphaMask)) {
63                 if (RGBA)
64                     srcVector = _mm_shuffle_epi8(srcVector, rgbaMask);
65                 __m128i src1 = _mm_unpacklo_epi8(srcVector, zero);
66                 __m128i src2 = _mm_unpackhi_epi8(srcVector, zero);
67                 __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask);
68                 __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask);
69                 src1 = _mm_mullo_epi16(src1, alpha1);
70                 src2 = _mm_mullo_epi16(src2, alpha2);
71                 src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 8));
72                 src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 8));
73                 src1 = _mm_add_epi16(src1, half);
74                 src2 = _mm_add_epi16(src2, half);
75                 src1 = _mm_srli_epi16(src1, 8);
76                 src2 = _mm_srli_epi16(src2, 8);
77                 src1 = _mm_blend_epi16(src1, alpha1, 0x88);
78                 src2 = _mm_blend_epi16(src2, alpha2, 0x88);
79                 srcVector = _mm_packus_epi16(src1, src2);
80                 _mm_storeu_si128((__m128i *)&buffer[i], srcVector);
81             } else {
82                 if (RGBA)
83                     _mm_storeu_si128((__m128i *)&buffer[i], _mm_shuffle_epi8(srcVector, rgbaMask));
84                 else if (buffer != src)
85                     _mm_storeu_si128((__m128i *)&buffer[i], srcVector);
86             }
87         } else {
88             _mm_storeu_si128((__m128i *)&buffer[i], zero);
89         }
90     }
91 
92     SIMD_EPILOGUE(i, count, 3) {
93         uint v = qPremultiply(src[i]);
94         buffer[i] = RGBA ? RGBA2ARGB(v) : v;
95     }
96 }
97 
98 template<bool RGBA>
convertARGBToRGBA64PM_sse4(QRgba64 * buffer,const uint * src,int count)99 static void convertARGBToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count)
100 {
101     int i = 0;
102     const __m128i alphaMask = _mm_set1_epi32(0xff000000);
103     const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
104     const __m128i shuffleMask = _mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15);
105     const __m128i zero = _mm_setzero_si128();
106 
107     for (; i < count - 3; i += 4) {
108         __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]);
109         if (!_mm_testz_si128(srcVector, alphaMask)) {
110             bool cf = _mm_testc_si128(srcVector, alphaMask);
111 
112             if (!RGBA)
113                 srcVector = _mm_shuffle_epi8(srcVector, rgbaMask);
114             const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector);
115             const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector);
116             if (!cf) {
117                 __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask);
118                 __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask);
119                 __m128i dst1 = _mm_mulhi_epu16(src1, alpha1);
120                 __m128i dst2 = _mm_mulhi_epu16(src2, alpha2);
121                 // Map 0->0xfffe to 0->0xffff
122                 dst1 = _mm_add_epi16(dst1, _mm_srli_epi16(dst1, 15));
123                 dst2 = _mm_add_epi16(dst2, _mm_srli_epi16(dst2, 15));
124                 // correct alpha value:
125                 dst1 = _mm_blend_epi16(dst1, src1, 0x88);
126                 dst2 = _mm_blend_epi16(dst2, src2, 0x88);
127                 _mm_storeu_si128((__m128i *)&buffer[i], dst1);
128                 _mm_storeu_si128((__m128i *)&buffer[i + 2], dst2);
129             } else {
130                 _mm_storeu_si128((__m128i *)&buffer[i], src1);
131                 _mm_storeu_si128((__m128i *)&buffer[i + 2], src2);
132             }
133         } else {
134             _mm_storeu_si128((__m128i *)&buffer[i], zero);
135             _mm_storeu_si128((__m128i *)&buffer[i + 2], zero);
136         }
137     }
138 
139     SIMD_EPILOGUE(i, count, 3) {
140         const uint s = RGBA ? RGBA2ARGB(src[i]) : src[i];
141         buffer[i] = QRgba64::fromArgb32(s).premultiplied();
142     }
143 }
144 #endif // __AVX2__
145 
reciprocal_mul_ps(__m128 a,float mul)146 static inline __m128 Q_DECL_VECTORCALL reciprocal_mul_ps(__m128 a, float mul)
147 {
148     __m128 ia = _mm_rcp_ps(a); // Approximate 1/a
149     // Improve precision of ia using Newton-Raphson
150     ia = _mm_sub_ps(_mm_add_ps(ia, ia), _mm_mul_ps(ia, _mm_mul_ps(ia, a)));
151     ia = _mm_mul_ps(ia, _mm_set1_ps(mul));
152     return ia;
153 }
154 
155 template<bool RGBA, bool RGBx>
convertARGBFromARGB32PM_sse4(uint * buffer,const uint * src,int count)156 static inline void convertARGBFromARGB32PM_sse4(uint *buffer, const uint *src, int count)
157 {
158     int i = 0;
159     if ((_MM_GET_EXCEPTION_MASK() & _MM_MASK_INVALID) == 0) {
160         for (; i < count; ++i) {
161             uint v = qUnpremultiply(src[i]);
162             if (RGBx)
163                 v = 0xff000000 | v;
164             if (RGBA)
165                 v = ARGB2RGBA(v);
166             buffer[i] = v;
167         }
168         return;
169     }
170     const __m128i alphaMask = _mm_set1_epi32(0xff000000);
171     const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
172     const __m128i zero = _mm_setzero_si128();
173 
174     for (; i < count - 3; i += 4) {
175         __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]);
176         if (!_mm_testz_si128(srcVector, alphaMask)) {
177             if (!_mm_testc_si128(srcVector, alphaMask)) {
178                 __m128i srcVectorAlpha = _mm_srli_epi32(srcVector, 24);
179                 if (RGBA)
180                     srcVector = _mm_shuffle_epi8(srcVector, rgbaMask);
181                 const __m128 a = _mm_cvtepi32_ps(srcVectorAlpha);
182                 const __m128 ia = reciprocal_mul_ps(a, 255.0f);
183                 __m128i src1 = _mm_unpacklo_epi8(srcVector, zero);
184                 __m128i src3 = _mm_unpackhi_epi8(srcVector, zero);
185                 __m128i src2 = _mm_unpackhi_epi16(src1, zero);
186                 __m128i src4 = _mm_unpackhi_epi16(src3, zero);
187                 src1 = _mm_unpacklo_epi16(src1, zero);
188                 src3 = _mm_unpacklo_epi16(src3, zero);
189                 __m128 ia1 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(0, 0, 0, 0));
190                 __m128 ia2 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(1, 1, 1, 1));
191                 __m128 ia3 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(2, 2, 2, 2));
192                 __m128 ia4 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(3, 3, 3, 3));
193                 src1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src1), ia1));
194                 src2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src2), ia2));
195                 src3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src3), ia3));
196                 src4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src4), ia4));
197                 src1 = _mm_packus_epi32(src1, src2);
198                 src3 = _mm_packus_epi32(src3, src4);
199                 src1 = _mm_packus_epi16(src1, src3);
200                 // Handle potential alpha == 0 values:
201                 __m128i srcVectorAlphaMask = _mm_cmpeq_epi32(srcVectorAlpha, zero);
202                 src1 = _mm_andnot_si128(srcVectorAlphaMask, src1);
203                 // Fixup alpha values:
204                 if (RGBx)
205                     srcVector = _mm_or_si128(src1, alphaMask);
206                 else
207                     srcVector = _mm_blendv_epi8(src1, srcVector, alphaMask);
208                 _mm_storeu_si128((__m128i *)&buffer[i], srcVector);
209             } else {
210                 if (RGBA)
211                     _mm_storeu_si128((__m128i *)&buffer[i], _mm_shuffle_epi8(srcVector, rgbaMask));
212                 else if (buffer != src)
213                     _mm_storeu_si128((__m128i *)&buffer[i], srcVector);
214             }
215         } else {
216             if (RGBx)
217                 _mm_storeu_si128((__m128i *)&buffer[i], alphaMask);
218             else
219                 _mm_storeu_si128((__m128i *)&buffer[i], zero);
220         }
221     }
222 
223     SIMD_EPILOGUE(i, count, 3) {
224         uint v = qUnpremultiply_sse4(src[i]);
225         if (RGBx)
226             v = 0xff000000 | v;
227         if (RGBA)
228             v = ARGB2RGBA(v);
229         buffer[i] = v;
230     }
231 }
232 
233 template<bool RGBA>
convertARGBFromRGBA64PM_sse4(uint * buffer,const QRgba64 * src,int count)234 static inline void convertARGBFromRGBA64PM_sse4(uint *buffer, const QRgba64 *src, int count)
235 {
236     int i = 0;
237     if ((_MM_GET_EXCEPTION_MASK() & _MM_MASK_INVALID) == 0) {
238         for (; i < count; ++i) {
239             const QRgba64 v = src[i].unpremultiplied();
240             buffer[i] = RGBA ? toRgba8888(v) : toArgb32(v);
241         }
242         return;
243     }
244     const __m128i alphaMask = _mm_set1_epi64x(qint64(Q_UINT64_C(0xffff) << 48));
245     const __m128i alphaMask32 = _mm_set1_epi32(0xff000000);
246     const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
247     const __m128i zero = _mm_setzero_si128();
248 
249     for (; i < count - 3; i += 4) {
250         __m128i srcVector1 = _mm_loadu_si128((const __m128i *)&src[i]);
251         __m128i srcVector2 = _mm_loadu_si128((const __m128i *)&src[i + 2]);
252         bool transparent1 = _mm_testz_si128(srcVector1, alphaMask);
253         bool opaque1 = _mm_testc_si128(srcVector1, alphaMask);
254         bool transparent2 = _mm_testz_si128(srcVector2, alphaMask);
255         bool opaque2 = _mm_testc_si128(srcVector2, alphaMask);
256 
257         if (!(transparent1 && transparent2)) {
258             if (!(opaque1 && opaque2)) {
259                 __m128i srcVector1Alpha = _mm_srli_epi64(srcVector1, 48);
260                 __m128i srcVector2Alpha = _mm_srli_epi64(srcVector2, 48);
261                 __m128i srcVectorAlpha = _mm_packus_epi32(srcVector1Alpha, srcVector2Alpha);
262                 const __m128 a = _mm_cvtepi32_ps(srcVectorAlpha);
263                 // Convert srcVectorAlpha to final 8-bit alpha channel
264                 srcVectorAlpha = _mm_add_epi32(srcVectorAlpha, _mm_set1_epi32(128));
265                 srcVectorAlpha = _mm_sub_epi32(srcVectorAlpha, _mm_srli_epi32(srcVectorAlpha, 8));
266                 srcVectorAlpha = _mm_srli_epi32(srcVectorAlpha, 8);
267                 srcVectorAlpha = _mm_slli_epi32(srcVectorAlpha, 24);
268                 const __m128 ia = reciprocal_mul_ps(a, 255.0f);
269                 __m128i src1 = _mm_unpacklo_epi16(srcVector1, zero);
270                 __m128i src2 = _mm_unpackhi_epi16(srcVector1, zero);
271                 __m128i src3 = _mm_unpacklo_epi16(srcVector2, zero);
272                 __m128i src4 = _mm_unpackhi_epi16(srcVector2, zero);
273                 __m128 ia1 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(0, 0, 0, 0));
274                 __m128 ia2 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(1, 1, 1, 1));
275                 __m128 ia3 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(2, 2, 2, 2));
276                 __m128 ia4 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(3, 3, 3, 3));
277                 src1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src1), ia1));
278                 src2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src2), ia2));
279                 src3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src3), ia3));
280                 src4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src4), ia4));
281                 src1 = _mm_packus_epi32(src1, src2);
282                 src3 = _mm_packus_epi32(src3, src4);
283                 // Handle potential alpha == 0 values:
284                 __m128i srcVector1AlphaMask = _mm_cmpeq_epi64(srcVector1Alpha, zero);
285                 __m128i srcVector2AlphaMask = _mm_cmpeq_epi64(srcVector2Alpha, zero);
286                 src1 = _mm_andnot_si128(srcVector1AlphaMask, src1);
287                 src3 = _mm_andnot_si128(srcVector2AlphaMask, src3);
288                 src1 = _mm_packus_epi16(src1, src3);
289                 // Fixup alpha values:
290                 src1 = _mm_blendv_epi8(src1, srcVectorAlpha, alphaMask32);
291                 // Fix RGB order
292                 if (!RGBA)
293                     src1 = _mm_shuffle_epi8(src1, rgbaMask);
294                 _mm_storeu_si128((__m128i *)&buffer[i], src1);
295             } else {
296                 __m128i src1 = _mm_unpacklo_epi16(srcVector1, zero);
297                 __m128i src2 = _mm_unpackhi_epi16(srcVector1, zero);
298                 __m128i src3 = _mm_unpacklo_epi16(srcVector2, zero);
299                 __m128i src4 = _mm_unpackhi_epi16(srcVector2, zero);
300                 src1 = _mm_add_epi32(src1, _mm_set1_epi32(128));
301                 src2 = _mm_add_epi32(src2, _mm_set1_epi32(128));
302                 src3 = _mm_add_epi32(src3, _mm_set1_epi32(128));
303                 src4 = _mm_add_epi32(src4, _mm_set1_epi32(128));
304                 src1 = _mm_sub_epi32(src1, _mm_srli_epi32(src1, 8));
305                 src2 = _mm_sub_epi32(src2, _mm_srli_epi32(src2, 8));
306                 src3 = _mm_sub_epi32(src3, _mm_srli_epi32(src3, 8));
307                 src4 = _mm_sub_epi32(src4, _mm_srli_epi32(src4, 8));
308                 src1 = _mm_srli_epi32(src1, 8);
309                 src2 = _mm_srli_epi32(src2, 8);
310                 src3 = _mm_srli_epi32(src3, 8);
311                 src4 = _mm_srli_epi32(src4, 8);
312                 src1 = _mm_packus_epi32(src1, src2);
313                 src3 = _mm_packus_epi32(src3, src4);
314                 src1 = _mm_packus_epi16(src1, src3);
315                 if (!RGBA)
316                     src1 = _mm_shuffle_epi8(src1, rgbaMask);
317                 _mm_storeu_si128((__m128i *)&buffer[i], src1);
318             }
319         } else {
320             _mm_storeu_si128((__m128i *)&buffer[i], zero);
321         }
322     }
323 
324     SIMD_EPILOGUE(i, count, 3) {
325         buffer[i] = qConvertRgba64ToRgb32_sse4<RGBA ? PixelOrderRGB : PixelOrderBGR>(src[i]);
326     }
327 }
328 
329 #ifndef __AVX2__
convertARGB32ToARGB32PM_sse4(uint * buffer,int count,const QVector<QRgb> *)330 void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *)
331 {
332     convertARGBToARGB32PM_sse4<false>(buffer, buffer, count);
333 }
334 
convertRGBA8888ToARGB32PM_sse4(uint * buffer,int count,const QVector<QRgb> *)335 void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *)
336 {
337     convertARGBToARGB32PM_sse4<true>(buffer, buffer, count);
338 }
339 
convertARGB32ToRGBA64PM_sse4(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)340 const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
341                                                          const QVector<QRgb> *, QDitherInfo *)
342 {
343     convertARGBToRGBA64PM_sse4<false>(buffer, src, count);
344     return buffer;
345 }
346 
convertRGBA8888ToRGBA64PM_sse4(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)347 const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
348                                                            const QVector<QRgb> *, QDitherInfo *)
349 {
350     convertARGBToRGBA64PM_sse4<true>(buffer, src, count);
351     return buffer;
352 }
353 
fetchARGB32ToARGB32PM_sse4(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)354 const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
355                                                   const QVector<QRgb> *, QDitherInfo *)
356 {
357     convertARGBToARGB32PM_sse4<false>(buffer, reinterpret_cast<const uint *>(src) + index, count);
358     return buffer;
359 }
360 
fetchRGBA8888ToARGB32PM_sse4(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)361 const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
362                                                      const QVector<QRgb> *, QDitherInfo *)
363 {
364     convertARGBToARGB32PM_sse4<true>(buffer, reinterpret_cast<const uint *>(src) + index, count);
365     return buffer;
366 }
367 
fetchARGB32ToRGBA64PM_sse4(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)368 const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
369                                                       const QVector<QRgb> *, QDitherInfo *)
370 {
371     convertARGBToRGBA64PM_sse4<false>(buffer, reinterpret_cast<const uint *>(src) + index, count);
372     return buffer;
373 }
374 
fetchRGBA8888ToRGBA64PM_sse4(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)375 const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
376                                                         const QVector<QRgb> *, QDitherInfo *)
377 {
378     convertARGBToRGBA64PM_sse4<true>(buffer, reinterpret_cast<const uint *>(src) + index, count);
379     return buffer;
380 }
381 #endif // __AVX2__
382 
storeRGB32FromARGB32PM_sse4(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)383 void QT_FASTCALL storeRGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
384                                              const QVector<QRgb> *, QDitherInfo *)
385 {
386     uint *d = reinterpret_cast<uint *>(dest) + index;
387     convertARGBFromARGB32PM_sse4<false,true>(d, src, count);
388 }
389 
storeARGB32FromARGB32PM_sse4(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)390 void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
391                                               const QVector<QRgb> *, QDitherInfo *)
392 {
393     uint *d = reinterpret_cast<uint *>(dest) + index;
394     convertARGBFromARGB32PM_sse4<false,false>(d, src, count);
395 }
396 
storeRGBA8888FromARGB32PM_sse4(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)397 void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
398                                                 const QVector<QRgb> *, QDitherInfo *)
399 {
400     uint *d = reinterpret_cast<uint *>(dest) + index;
401     convertARGBFromARGB32PM_sse4<true,false>(d, src, count);
402 }
403 
storeRGBXFromARGB32PM_sse4(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)404 void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
405                                             const QVector<QRgb> *, QDitherInfo *)
406 {
407     uint *d = reinterpret_cast<uint *>(dest) + index;
408     convertARGBFromARGB32PM_sse4<true,true>(d, src, count);
409 }
410 
411 template<QtPixelOrder PixelOrder>
storeA2RGB30PMFromARGB32PM_sse4(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)412 void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
413                                                  const QVector<QRgb> *, QDitherInfo *)
414 {
415     uint *d = reinterpret_cast<uint *>(dest) + index;
416     for (int i = 0; i < count; ++i)
417         d[i] = qConvertArgb32ToA2rgb30_sse4<PixelOrder>(src[i]);
418 }
419 
420 #if QT_CONFIG(raster_64bit)
destStore64ARGB32_sse4(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 * buffer,int length)421 void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
422 {
423     uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
424     convertARGBFromRGBA64PM_sse4<false>(dest, buffer, length);
425 }
426 
destStore64RGBA8888_sse4(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 * buffer,int length)427 void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
428 {
429     uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
430     convertARGBFromRGBA64PM_sse4<true>(dest, buffer, length);
431 }
432 #endif
433 
storeARGB32FromRGBA64PM_sse4(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)434 void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
435                                               const QVector<QRgb> *, QDitherInfo *)
436 {
437     uint *d = (uint*)dest + index;
438     convertARGBFromRGBA64PM_sse4<false>(d, src, count);
439 }
440 
storeRGBA8888FromRGBA64PM_sse4(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)441 void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
442                                                 const QVector<QRgb> *, QDitherInfo *)
443 {
444     uint *d = (uint*)dest + index;
445     convertARGBFromRGBA64PM_sse4<true>(d, src, count);
446 }
447 
448 template
449 void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>(uchar *dest, const uint *src, int index, int count,
450                                                                 const QVector<QRgb> *, QDitherInfo *);
451 template
452 void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>(uchar *dest, const uint *src, int index, int count,
453                                                                 const QVector<QRgb> *, QDitherInfo *);
454 
455 QT_END_NAMESPACE
456 
457 #endif
458