1 /****************************************************************************
2 **
3 ** Copyright (C) 2018 The Qt Company Ltd.
4 ** Copyright (C) 2018 Intel Corporation.
5 ** Contact: https://www.qt.io/licensing/
6 **
7 ** This file is part of the QtGui module of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** Commercial License Usage
11 ** Licensees holding valid commercial Qt licenses may use this file in
12 ** accordance with the commercial license agreement provided with the
13 ** Software or, alternatively, in accordance with the terms contained in
14 ** a written agreement between you and The Qt Company. For licensing terms
15 ** and conditions see https://www.qt.io/terms-conditions. For further
16 ** information use the contact form at https://www.qt.io/contact-us.
17 **
18 ** GNU Lesser General Public License Usage
19 ** Alternatively, this file may be used under the terms of the GNU Lesser
20 ** General Public License version 3 as published by the Free Software
21 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
22 ** packaging of this file. Please review the following information to
23 ** ensure the GNU Lesser General Public License version 3 requirements
24 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25 **
26 ** GNU General Public License Usage
27 ** Alternatively, this file may be used under the terms of the GNU
28 ** General Public License version 2.0 or (at your option) the GNU General
29 ** Public license version 3 or any later version approved by the KDE Free
30 ** Qt Foundation. The licenses are as published by the Free Software
31 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32 ** included in the packaging of this file. Please review the following
33 ** information to ensure the GNU General Public License requirements will
34 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35 ** https://www.gnu.org/licenses/gpl-3.0.html.
36 **
37 ** $QT_END_LICENSE$
38 **
39 ****************************************************************************/
40 
41 #include <qglobal.h>
42 
43 #include <qstylehints.h>
44 #include <qguiapplication.h>
45 #include <qatomic.h>
46 #include <private/qcolortrclut_p.h>
47 #include <private/qdrawhelper_p.h>
48 #include <private/qpaintengine_raster_p.h>
49 #include <private/qpainter_p.h>
50 #include <private/qdrawhelper_x86_p.h>
51 #include <private/qdrawingprimitive_sse2_p.h>
52 #include <private/qdrawhelper_neon_p.h>
53 #if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
54 #include <private/qdrawhelper_mips_dsp_p.h>
55 #endif
56 #include <private/qguiapplication_p.h>
57 #include <private/qrgba64_p.h>
58 #include <qendian.h>
59 #include <qloggingcategory.h>
60 #include <qmath.h>
61 
62 QT_BEGIN_NAMESPACE
63 
64 Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
65 
66 #define MASK(src, a) src = BYTE_MUL(src, a)
67 
68 /*
69   constants and structures
70 */
71 
72 enum {
73     fixed_scale = 1 << 16,
74     half_point = 1 << 15
75 };
76 
77 template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth();
78 template<QImage::Format> Q_DECL_CONSTEXPR uint redShift();
79 template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth();
80 template<QImage::Format> Q_DECL_CONSTEXPR uint greenShift();
81 template<QImage::Format> Q_DECL_CONSTEXPR uint blueWidth();
82 template<QImage::Format> Q_DECL_CONSTEXPR uint blueShift();
83 template<QImage::Format> Q_DECL_CONSTEXPR uint alphaWidth();
84 template<QImage::Format> Q_DECL_CONSTEXPR uint alphaShift();
85 
redWidth()86 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB16>() { return 5; }
redWidth()87 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB444>() { return 4; }
redWidth()88 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB555>() { return 5; }
redWidth()89 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB666>() { return 6; }
redWidth()90 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB888>() { return 8; }
redWidth()91 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_BGR888>() { return 8; }
redWidth()92 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
redWidth()93 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
redWidth()94 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; }
redWidth()95 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
redWidth()96 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBX8888>() { return 8; }
redWidth()97 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888>() { return 8; }
redWidth()98 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
99 
redShift()100 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB16>() { return  11; }
redShift()101 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB444>() { return  8; }
redShift()102 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB555>() { return 10; }
redShift()103 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB666>() { return 12; }
redShift()104 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB888>() { return 16; }
redShift()105 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_BGR888>() { return 0; }
redShift()106 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB4444_Premultiplied>() { return  8; }
redShift()107 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8555_Premultiplied>() { return 18; }
redShift()108 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8565_Premultiplied>() { return 19; }
redShift()109 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB6666_Premultiplied>() { return 12; }
110 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
redShift()111 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 24; }
redShift()112 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 24; }
redShift()113 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; }
114 #else
redShift()115 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 0; }
redShift()116 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 0; }
redShift()117 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; }
118 #endif
greenWidth()119 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB16>() { return 6; }
greenWidth()120 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB444>() { return 4; }
greenWidth()121 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB555>() { return 5; }
greenWidth()122 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB666>() { return 6; }
greenWidth()123 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB888>() { return 8; }
greenWidth()124 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_BGR888>() { return 8; }
greenWidth()125 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
greenWidth()126 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
greenWidth()127 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8565_Premultiplied>() { return 6; }
greenWidth()128 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
greenWidth()129 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBX8888>() { return 8; }
greenWidth()130 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888>() { return 8; }
greenWidth()131 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
132 
greenShift()133 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB16>() { return  5; }
greenShift()134 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB444>() { return 4; }
greenShift()135 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB555>() { return 5; }
greenShift()136 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB666>() { return 6; }
greenShift()137 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB888>() { return 8; }
greenShift()138 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_BGR888>() { return 8; }
greenShift()139 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB4444_Premultiplied>() { return  4; }
greenShift()140 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8555_Premultiplied>() { return 13; }
greenShift()141 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8565_Premultiplied>() { return 13; }
greenShift()142 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB6666_Premultiplied>() { return  6; }
143 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
greenShift()144 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 16; }
greenShift()145 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 16; }
greenShift()146 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; }
147 #else
greenShift()148 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 8; }
greenShift()149 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 8; }
greenShift()150 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
151 #endif
blueWidth()152 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB16>() { return 5; }
blueWidth()153 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB444>() { return 4; }
blueWidth()154 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB555>() { return 5; }
blueWidth()155 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB666>() { return 6; }
blueWidth()156 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB888>() { return 8; }
blueWidth()157 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_BGR888>() { return 8; }
blueWidth()158 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
blueWidth()159 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
blueWidth()160 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; }
blueWidth()161 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
blueWidth()162 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBX8888>() { return 8; }
blueWidth()163 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888>() { return 8; }
blueWidth()164 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
165 
blueShift()166 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB16>() { return 0; }
blueShift()167 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB444>() { return 0; }
blueShift()168 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB555>() { return 0; }
blueShift()169 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB666>() { return 0; }
blueShift()170 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB888>() { return 0; }
blueShift()171 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_BGR888>() { return 16; }
blueShift()172 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB4444_Premultiplied>() { return 0; }
blueShift()173 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8555_Premultiplied>() { return 8; }
blueShift()174 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8565_Premultiplied>() { return 8; }
blueShift()175 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB6666_Premultiplied>() { return 0; }
176 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
blueShift()177 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 8; }
blueShift()178 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 8; }
blueShift()179 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
180 #else
blueShift()181 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 16; }
blueShift()182 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 16; }
blueShift()183 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; }
184 #endif
alphaWidth()185 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB16>() { return 0; }
alphaWidth()186 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB444>() { return 0; }
alphaWidth()187 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB555>() { return 0; }
alphaWidth()188 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB666>() { return 0; }
alphaWidth()189 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB888>() { return 0; }
alphaWidth()190 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_BGR888>() { return 0; }
alphaWidth()191 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB4444_Premultiplied>() { return  4; }
alphaWidth()192 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8555_Premultiplied>() { return  8; }
alphaWidth()193 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8565_Premultiplied>() { return  8; }
alphaWidth()194 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB6666_Premultiplied>() { return  6; }
alphaWidth()195 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBX8888>() { return 0; }
alphaWidth()196 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888>() { return 8; }
alphaWidth()197 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
198 
alphaShift()199 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB16>() { return 0; }
alphaShift()200 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB444>() { return 0; }
alphaShift()201 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB555>() { return 0; }
alphaShift()202 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB666>() { return 0; }
alphaShift()203 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB888>() { return 0; }
alphaShift()204 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_BGR888>() { return 0; }
alphaShift()205 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB4444_Premultiplied>() { return 12; }
alphaShift()206 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8555_Premultiplied>() { return  0; }
alphaShift()207 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8565_Premultiplied>() { return  0; }
alphaShift()208 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB6666_Premultiplied>() { return 18; }
209 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
alphaShift()210 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 0; }
alphaShift()211 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 0; }
alphaShift()212 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; }
213 #else
alphaShift()214 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 24; }
alphaShift()215 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 24; }
alphaShift()216 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; }
217 #endif
218 
219 template<QImage::Format> constexpr QPixelLayout::BPP bitsPerPixel();
bitsPerPixel()220 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; }
bitsPerPixel()221 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; }
bitsPerPixel()222 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; }
bitsPerPixel()223 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; }
bitsPerPixel()224 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; }
bitsPerPixel()225 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_BGR888>() { return QPixelLayout::BPP24; }
bitsPerPixel()226 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; }
bitsPerPixel()227 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; }
bitsPerPixel()228 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; }
bitsPerPixel()229 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; }
bitsPerPixel()230 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBX8888>() { return QPixelLayout::BPP32; }
bitsPerPixel()231 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888>() { return QPixelLayout::BPP32; }
bitsPerPixel()232 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888_Premultiplied>() { return QPixelLayout::BPP32; }
233 
234 
235 typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count);
236 
237 template <QPixelLayout::BPP bpp> static
fetchPixel(const uchar *,int)238 uint QT_FASTCALL fetchPixel(const uchar *, int)
239 {
240     Q_UNREACHABLE();
241     return 0;
242 }
243 
244 template <>
fetchPixel(const uchar * src,int index)245 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index)
246 {
247     return (src[index >> 3] >> (index & 7)) & 1;
248 }
249 
250 template <>
fetchPixel(const uchar * src,int index)251 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index)
252 {
253     return (src[index >> 3] >> (~index & 7)) & 1;
254 }
255 
256 template <>
fetchPixel(const uchar * src,int index)257 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index)
258 {
259     return src[index];
260 }
261 
262 template <>
fetchPixel(const uchar * src,int index)263 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index)
264 {
265     return reinterpret_cast<const quint16 *>(src)[index];
266 }
267 
268 template <>
fetchPixel(const uchar * src,int index)269 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index)
270 {
271     return reinterpret_cast<const quint24 *>(src)[index];
272 }
273 
274 template <>
fetchPixel(const uchar * src,int index)275 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index)
276 {
277     return reinterpret_cast<const uint *>(src)[index];
278 }
279 
280 template <>
fetchPixel(const uchar * src,int index)281 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP64>(const uchar *src, int index)
282 {
283     // We have to do the conversion in fetch to fit into a 32bit uint
284     QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
285     return c.toArgb32();
286 }
287 
288 template <QPixelLayout::BPP bpp>
fetchPixel64(const uchar * src,int index)289 static quint64 QT_FASTCALL fetchPixel64(const uchar *src, int index)
290 {
291     Q_STATIC_ASSERT(bpp != QPixelLayout::BPP64);
292     return fetchPixel<bpp>(src, index);
293 }
294 
295 template <QPixelLayout::BPP width> static
296 void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel);
297 
298 template <>
storePixel(uchar * dest,int index,uint pixel)299 inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel)
300 {
301     reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel);
302 }
303 
304 template <>
storePixel(uchar * dest,int index,uint pixel)305 inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel)
306 {
307     reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel);
308 }
309 
310 typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index);
311 
312 static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = {
313     nullptr, // BPPNone
314     fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB
315     fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB
316     fetchPixel<QPixelLayout::BPP8>, // BPP8
317     fetchPixel<QPixelLayout::BPP16>, // BPP16
318     fetchPixel<QPixelLayout::BPP24>, // BPP24
319     fetchPixel<QPixelLayout::BPP32>, // BPP32
320     fetchPixel<QPixelLayout::BPP64> // BPP64
321 };
322 
323 template<QImage::Format Format>
convertPixelToRGB32(uint s)324 static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s)
325 {
326     Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
327     Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
328     Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
329 
330     Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
331     Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
332     Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
333 
334     Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
335     Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
336     Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
337 
338     uint red   = (s >> redShift<Format>()) & redMask;
339     uint green = (s >> greenShift<Format>()) & greenMask;
340     uint blue  = (s >> blueShift<Format>()) & blueMask;
341 
342     red = ((red << redLeftShift) | (red >> redRightShift)) << 16;
343     green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8;
344     blue = (blue << blueLeftShift) | (blue >> blueRightShift);
345     return 0xff000000 | red | green | blue;
346 }
347 
348 template<QImage::Format Format>
convertToRGB32(uint * buffer,int count,const QVector<QRgb> *)349 static void QT_FASTCALL convertToRGB32(uint *buffer, int count, const QVector<QRgb> *)
350 {
351     for (int i = 0; i < count; ++i)
352         buffer[i] = convertPixelToRGB32<Format>(buffer[i]);
353 }
354 
355 #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
356 extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count);
357 #endif
358 
359 template<QImage::Format Format>
fetchRGBToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)360 static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, int index, int count,
361                                                const QVector<QRgb> *, QDitherInfo *)
362 {
363     constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
364 #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
365     if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
366         // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
367         // to vectorize the deforested version below.
368         fetchPixelsBPP24_ssse3(buffer, src, index, count);
369         convertToRGB32<Format>(buffer, count, nullptr);
370         return buffer;
371     }
372 #endif
373     for (int i = 0; i < count; ++i)
374         buffer[i] = convertPixelToRGB32<Format>(fetchPixel<BPP>(src, index + i));
375     return buffer;
376 }
377 
378 template<QImage::Format Format>
convertPixelToRGB64(uint s)379 static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s)
380 {
381     return QRgba64::fromArgb32(convertPixelToRGB32<Format>(s));
382 }
383 
384 template<QImage::Format Format>
convertToRGB64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)385 static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count,
386                                                  const QVector<QRgb> *, QDitherInfo *)
387 {
388     for (int i = 0; i < count; ++i)
389         buffer[i] = convertPixelToRGB64<Format>(src[i]);
390     return buffer;
391 }
392 
393 template<QImage::Format Format>
fetchRGBToRGB64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)394 static const QRgba64 *QT_FASTCALL fetchRGBToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
395                                                   const QVector<QRgb> *, QDitherInfo *)
396 {
397     for (int i = 0; i < count; ++i)
398         buffer[i] = convertPixelToRGB64<Format>(fetchPixel<bitsPerPixel<Format>()>(src, index + i));
399     return buffer;
400 }
401 
402 template<QImage::Format Format>
convertPixelToARGB32PM(uint s)403 static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s)
404 {
405     Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1);
406     Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
407     Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
408     Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
409 
410     Q_CONSTEXPR uchar alphaLeftShift = 8 - alphaWidth<Format>();
411     Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
412     Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
413     Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
414 
415     Q_CONSTEXPR uchar alphaRightShift = 2 * alphaWidth<Format>() - 8;
416     Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
417     Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
418     Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
419 
420     Q_CONSTEXPR bool mustMin = (alphaWidth<Format>() != redWidth<Format>()) ||
421                                (alphaWidth<Format>() != greenWidth<Format>()) ||
422                                (alphaWidth<Format>() != blueWidth<Format>());
423 
424     uint alpha = (s >> alphaShift<Format>()) & alphaMask;
425     uint red   = (s >> redShift<Format>()) & redMask;
426     uint green = (s >> greenShift<Format>()) & greenMask;
427     uint blue  = (s >> blueShift<Format>()) & blueMask;
428 
429     alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift);
430     red   = (red << redLeftShift) | (red >> redRightShift);
431     green = (green << greenLeftShift) | (green >> greenRightShift);
432     blue  = (blue << blueLeftShift) | (blue >> blueRightShift);
433 
434     if (mustMin) {
435         red   = qMin(alpha, red);
436         green = qMin(alpha, green);
437         blue  = qMin(alpha, blue);
438     }
439 
440     return (alpha << 24) | (red << 16) | (green << 8) | blue;
441 }
442 
443 template<QImage::Format Format>
convertARGBPMToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)444 static void QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
445 {
446     for (int i = 0; i < count; ++i)
447         buffer[i] = convertPixelToARGB32PM<Format>(buffer[i]);
448 }
449 
450 template<QImage::Format Format>
fetchARGBPMToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)451 static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *src, int index, int count,
452                                                      const QVector<QRgb> *, QDitherInfo *)
453 {
454     constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
455 #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
456     if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
457         // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
458         // to vectorize the deforested version below.
459         fetchPixelsBPP24_ssse3(buffer, src, index, count);
460         convertARGBPMToARGB32PM<Format>(buffer, count, nullptr);
461         return buffer;
462     }
463 #endif
464     for (int i = 0; i < count; ++i)
465         buffer[i] = convertPixelToARGB32PM<Format>(fetchPixel<BPP>(src, index + i));
466     return buffer;
467 }
468 
469 template<QImage::Format Format>
convertPixelToRGBA64PM(uint s)470 static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s)
471 {
472     return QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(s));
473 }
474 
475 template<QImage::Format Format>
convertARGBPMToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)476 static const QRgba64 *QT_FASTCALL convertARGBPMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
477                                                           const QVector<QRgb> *, QDitherInfo *)
478 {
479     for (int i = 0; i < count; ++i)
480         buffer[i] = convertPixelToRGB64<Format>(src[i]);
481     return buffer;
482 }
483 
484 template<QImage::Format Format>
fetchARGBPMToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)485 static const QRgba64 *QT_FASTCALL fetchARGBPMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
486                                                         const QVector<QRgb> *, QDitherInfo *)
487 {
488     constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
489     for (int i = 0; i < count; ++i)
490         buffer[i] = convertPixelToRGBA64PM<Format>(fetchPixel<bpp>(src, index + i));
491     return buffer;
492 }
493 
494 template<QImage::Format Format, bool fromRGB>
storeRGBFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo * dither)495 static void QT_FASTCALL storeRGBFromARGB32PM(uchar *dest, const uint *src, int index, int count,
496                                              const QVector<QRgb> *, QDitherInfo *dither)
497 {
498     Q_CONSTEXPR uchar rWidth = redWidth<Format>();
499     Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
500     Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
501     constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
502 
503     // RGB32 -> RGB888 is not a precision loss.
504     if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) {
505         Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
506         Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
507         Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
508         Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
509         Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
510         Q_CONSTEXPR uchar bRightShift =  8 - blueWidth<Format>();
511 
512         for (int i = 0; i < count; ++i) {
513             const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]);
514             const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
515             const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
516             const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
517             storePixel<BPP>(dest, index + i, r | g | b);
518         };
519     } else {
520         // We do ordered dither by using a rounding conversion, but instead of
521         // adding half of input precision, we add the adjusted result from the
522         // bayer matrix before narrowing.
523         // Note: Rounding conversion in itself is different from the naive
524         // conversion we do above for non-dithering.
525         const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
526         for (int i = 0; i < count; ++i) {
527             const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]);
528             const int d = bayer_line[(dither->x + i) & 15];
529             const int dr = d - ((d + 1) >> rWidth);
530             const int dg = d - ((d + 1) >> gWidth);
531             const int db = d - ((d + 1) >> bWidth);
532             int r = qRed(c);
533             int g = qGreen(c);
534             int b = qBlue(c);
535             r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth);
536             g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth);
537             b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth);
538             const uint s = (r << redShift<Format>())
539                          | (g << greenShift<Format>())
540                          | (b << blueShift<Format>());
541             storePixel<BPP>(dest, index + i, s);
542         }
543     }
544 }
545 
546 template<QImage::Format Format, bool fromRGB>
storeARGBPMFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo * dither)547 static void QT_FASTCALL storeARGBPMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
548                                                 const QVector<QRgb> *, QDitherInfo *dither)
549 {
550     constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
551     if (!dither) {
552         Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1;
553         Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
554         Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
555         Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
556 
557         Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>();
558         Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
559         Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
560         Q_CONSTEXPR uchar bRightShift =  8 - blueWidth<Format>();
561 
562         Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>();
563         for (int i = 0; i < count; ++i) {
564             const uint c = src[i];
565             const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>());
566             const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
567             const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
568             const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
569             storePixel<BPP>(dest, index + i, a | r | g | b);
570         };
571     } else {
572         Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
573         Q_CONSTEXPR uchar rWidth = redWidth<Format>();
574         Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
575         Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
576 
577         const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
578         for (int i = 0; i < count; ++i) {
579             const uint c = src[i];
580             const int d = bayer_line[(dither->x + i) & 15];
581             const int da = d - ((d + 1) >> aWidth);
582             const int dr = d - ((d + 1) >> rWidth);
583             const int dg = d - ((d + 1) >> gWidth);
584             const int db = d - ((d + 1) >> bWidth);
585             int a = qAlpha(c);
586             int r = qRed(c);
587             int g = qGreen(c);
588             int b = qBlue(c);
589             if (fromRGB)
590                 a = (1 << aWidth) - 1;
591             else
592                 a = (a + ((da - a) >> aWidth) + 1) >> (8 - aWidth);
593             r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth);
594             g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth);
595             b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth);
596             uint s = (a << alphaShift<Format>())
597                    | (r << redShift<Format>())
598                    | (g << greenShift<Format>())
599                    | (b << blueShift<Format>());
600             storePixel<BPP>(dest, index + i, s);
601         }
602     }
603 }
604 
605 template<QImage::Format Format>
rbSwap(uchar * dst,const uchar * src,int count)606 static void QT_FASTCALL rbSwap(uchar *dst, const uchar *src, int count)
607 {
608     Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
609     Q_CONSTEXPR uchar aShift = alphaShift<Format>();
610     Q_CONSTEXPR uchar rWidth = redWidth<Format>();
611     Q_CONSTEXPR uchar rShift = redShift<Format>();
612     Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
613     Q_CONSTEXPR uchar gShift = greenShift<Format>();
614     Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
615     Q_CONSTEXPR uchar bShift = blueShift<Format>();
616 #ifdef Q_COMPILER_CONSTEXPR
617     Q_STATIC_ASSERT(rWidth == bWidth);
618 #endif
619     Q_CONSTEXPR uint redBlueMask = (1 << rWidth) - 1;
620     Q_CONSTEXPR uint alphaGreenMask = (((1 << aWidth) - 1) << aShift)
621                                     | (((1 << gWidth) - 1) << gShift);
622     constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
623 
624     for (int i = 0; i < count; ++i) {
625         const uint c = fetchPixel<bpp>(src, i);
626         const uint r = (c >> rShift) & redBlueMask;
627         const uint b = (c >> bShift) & redBlueMask;
628         const uint t = (c & alphaGreenMask)
629                      | (r << bShift)
630                      | (b << rShift);
631         storePixel<bpp>(dst, i, t);
632     }
633 }
634 
rbSwap_rgb32(uchar * d,const uchar * s,int count)635 static void QT_FASTCALL rbSwap_rgb32(uchar *d, const uchar *s, int count)
636 {
637     const uint *src = reinterpret_cast<const uint *>(s);
638     uint *dest = reinterpret_cast<uint *>(d);
639     for (int i = 0; i < count; ++i) {
640         const uint c = src[i];
641         const uint ag = c & 0xff00ff00;
642         const uint rb = c & 0x00ff00ff;
643         dest[i] = ag | (rb << 16) | (rb >> 16);
644     }
645 }
646 
647 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
648 template<>
rbSwap(uchar * d,const uchar * s,int count)649 void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count)
650 {
651     return rbSwap_rgb32(d, s, count);
652 }
653 #else
654 template<>
rbSwap(uchar * d,const uchar * s,int count)655 void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count)
656 {
657     const uint *src = reinterpret_cast<const uint *>(s);
658     uint *dest = reinterpret_cast<uint *>(d);
659     for (int i = 0; i < count; ++i) {
660         const uint c = src[i];
661         const uint rb = c & 0xff00ff00;
662         const uint ga = c & 0x00ff00ff;
663         dest[i] = ga | (rb << 16) | (rb >> 16);
664     }
665 }
666 #endif
667 
rbSwap_rgb30(uchar * d,const uchar * s,int count)668 static void QT_FASTCALL rbSwap_rgb30(uchar *d, const uchar *s, int count)
669 {
670     const uint *src = reinterpret_cast<const uint *>(s);
671     uint *dest = reinterpret_cast<uint *>(d);
672     UNALIASED_CONVERSION_LOOP(dest, src, count, qRgbSwapRgb30);
673 }
674 
pixelLayoutRGB()675 template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutRGB()
676 {
677     return QPixelLayout{
678         false,
679         false,
680         bitsPerPixel<Format>(),
681         rbSwap<Format>,
682         convertToRGB32<Format>,
683         convertToRGB64<Format>,
684         fetchRGBToRGB32<Format>,
685         fetchRGBToRGB64<Format>,
686         storeRGBFromARGB32PM<Format, false>,
687         storeRGBFromARGB32PM<Format, true>
688     };
689 }
690 
pixelLayoutARGBPM()691 template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutARGBPM()
692 {
693     return QPixelLayout{
694         true,
695         true,
696         bitsPerPixel<Format>(),
697         rbSwap<Format>,
698         convertARGBPMToARGB32PM<Format>,
699         convertARGBPMToRGBA64PM<Format>,
700         fetchARGBPMToARGB32PM<Format>,
701         fetchARGBPMToRGBA64PM<Format>,
702         storeARGBPMFromARGB32PM<Format, false>,
703         storeARGBPMFromARGB32PM<Format, true>
704     };
705 }
706 
convertIndexedToARGB32PM(uint * buffer,int count,const QVector<QRgb> * clut)707 static void QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, int count, const QVector<QRgb> *clut)
708 {
709     for (int i = 0; i < count; ++i)
710         buffer[i] = qPremultiply(clut->at(buffer[i]));
711 }
712 
713 template<QPixelLayout::BPP BPP>
fetchIndexedToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> * clut,QDitherInfo *)714 static const uint *QT_FASTCALL fetchIndexedToARGB32PM(uint *buffer, const uchar *src, int index, int count,
715                                                       const QVector<QRgb> *clut, QDitherInfo *)
716 {
717     for (int i = 0; i < count; ++i) {
718         const uint s = fetchPixel<BPP>(src, index + i);
719         buffer[i] = qPremultiply(clut->at(s));
720     }
721     return buffer;
722 }
723 
724 template<QPixelLayout::BPP BPP>
fetchIndexedToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> * clut,QDitherInfo *)725 static const QRgba64 *QT_FASTCALL fetchIndexedToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
726                                                          const QVector<QRgb> *clut, QDitherInfo *)
727 {
728     for (int i = 0; i < count; ++i) {
729         const uint s = fetchPixel<BPP>(src, index + i);
730         buffer[i] = QRgba64::fromArgb32(clut->at(s)).premultiplied();
731     }
732     return buffer;
733 }
734 
convertIndexedToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> * clut,QDitherInfo *)735 static const QRgba64 *QT_FASTCALL convertIndexedToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
736                                                            const QVector<QRgb> *clut, QDitherInfo *)
737 {
738     for (int i = 0; i < count; ++i)
739         buffer[i] = QRgba64::fromArgb32(clut->at(src[i])).premultiplied();
740     return buffer;
741 }
742 
convertPassThrough(uint *,int,const QVector<QRgb> *)743 static void QT_FASTCALL convertPassThrough(uint *, int, const QVector<QRgb> *)
744 {
745 }
746 
fetchPassThrough(uint *,const uchar * src,int index,int,const QVector<QRgb> *,QDitherInfo *)747 static const uint *QT_FASTCALL fetchPassThrough(uint *, const uchar *src, int index, int,
748                                                 const QVector<QRgb> *, QDitherInfo *)
749 {
750     return reinterpret_cast<const uint *>(src) + index;
751 }
752 
fetchPassThrough64(QRgba64 *,const uchar * src,int index,int,const QVector<QRgb> *,QDitherInfo *)753 static const QRgba64 *QT_FASTCALL fetchPassThrough64(QRgba64 *, const uchar *src, int index, int,
754                                                      const QVector<QRgb> *, QDitherInfo *)
755 {
756     return reinterpret_cast<const QRgba64 *>(src) + index;
757 }
758 
storePassThrough(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)759 static void QT_FASTCALL storePassThrough(uchar *dest, const uint *src, int index, int count,
760                                          const QVector<QRgb> *, QDitherInfo *)
761 {
762     uint *d = reinterpret_cast<uint *>(dest) + index;
763     if (d != src)
764         memcpy(d, src, count * sizeof(uint));
765 }
766 
convertARGB32ToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)767 static void QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
768 {
769     qt_convertARGB32ToARGB32PM(buffer, buffer, count);
770 }
771 
fetchARGB32ToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)772 static const uint *QT_FASTCALL fetchARGB32ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
773                                                      const QVector<QRgb> *, QDitherInfo *)
774 {
775     return qt_convertARGB32ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count);
776 }
777 
convertRGBA8888PMToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)778 static void QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
779 {
780     for (int i = 0; i < count; ++i)
781         buffer[i] = RGBA2ARGB(buffer[i]);
782 }
783 
fetchRGBA8888PMToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)784 static const uint *QT_FASTCALL fetchRGBA8888PMToARGB32PM(uint *buffer, const uchar *src, int index, int count,
785                                                          const QVector<QRgb> *, QDitherInfo *)
786 {
787     const uint *s  = reinterpret_cast<const uint *>(src) + index;
788     UNALIASED_CONVERSION_LOOP(buffer, s, count, RGBA2ARGB);
789     return buffer;
790 }
791 
convertRGBA8888ToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)792 static void QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
793 {
794     qt_convertRGBA8888ToARGB32PM(buffer, buffer, count);
795 }
796 
fetchRGBA8888ToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)797 static const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
798                                                        const QVector<QRgb> *, QDitherInfo *)
799 {
800     return qt_convertRGBA8888ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count);
801 }
802 
convertAlpha8ToRGB32(uint * buffer,int count,const QVector<QRgb> *)803 static void QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
804 {
805     for (int i = 0; i < count; ++i)
806         buffer[i] = qRgba(0, 0, 0, buffer[i]);
807 }
808 
fetchAlpha8ToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)809 static const uint *QT_FASTCALL fetchAlpha8ToRGB32(uint *buffer, const uchar *src, int index, int count,
810                                                   const QVector<QRgb> *, QDitherInfo *)
811 {
812     for (int i = 0; i < count; ++i)
813         buffer[i] = qRgba(0, 0, 0, src[index + i]);
814     return buffer;
815 }
816 
convertAlpha8ToRGB64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)817 static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const uint *src, int count,
818                                                        const QVector<QRgb> *, QDitherInfo *)
819 {
820     for (int i = 0; i < count; ++i)
821         buffer[i] = QRgba64::fromRgba(0, 0, 0, src[i]);
822     return buffer;
823 }
fetchAlpha8ToRGB64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)824 static const QRgba64 *QT_FASTCALL fetchAlpha8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
825                                                      const QVector<QRgb> *, QDitherInfo *)
826 {
827     for (int i = 0; i < count; ++i)
828         buffer[i] = QRgba64::fromRgba(0, 0, 0, src[index + i]);
829     return buffer;
830 }
831 
convertGrayscale8ToRGB32(uint * buffer,int count,const QVector<QRgb> *)832 static void QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
833 {
834     for (int i = 0; i < count; ++i) {
835         const uint s = buffer[i];
836         buffer[i] = qRgb(s, s, s);
837     }
838 }
839 
fetchGrayscale8ToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)840 static const uint *QT_FASTCALL fetchGrayscale8ToRGB32(uint *buffer, const uchar *src, int index, int count,
841                                                       const QVector<QRgb> *, QDitherInfo *)
842 {
843     for (int i = 0; i < count; ++i) {
844         const uint s = src[index + i];
845         buffer[i] = qRgb(s, s, s);
846     }
847     return buffer;
848 }
849 
convertGrayscale8ToRGB64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)850 static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, const uint *src, int count,
851                                                            const QVector<QRgb> *, QDitherInfo *)
852 {
853     for (int i = 0; i < count; ++i)
854         buffer[i] = QRgba64::fromRgba(src[i], src[i], src[i], 255);
855     return buffer;
856 }
857 
fetchGrayscale8ToRGB64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)858 static const QRgba64 *QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
859                                                          const QVector<QRgb> *, QDitherInfo *)
860 {
861     for (int i = 0; i < count; ++i) {
862         const uint s = src[index + i];
863         buffer[i] = QRgba64::fromRgba(s, s, s, 255);
864     }
865     return buffer;
866 }
867 
convertGrayscale16ToRGB32(uint * buffer,int count,const QVector<QRgb> *)868 static void QT_FASTCALL convertGrayscale16ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
869 {
870     for (int i = 0; i < count; ++i) {
871         const uint x = qt_div_257(buffer[i]);
872         buffer[i] = qRgb(x, x, x);
873     }
874 }
875 
fetchGrayscale16ToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)876 static const uint *QT_FASTCALL fetchGrayscale16ToRGB32(uint *buffer, const uchar *src, int index, int count,
877                                                       const QVector<QRgb> *, QDitherInfo *)
878 {
879     const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index;
880     for (int i = 0; i < count; ++i) {
881         const uint x = qt_div_257(s[i]);
882         buffer[i] = qRgb(x, x, x);
883     }
884     return buffer;
885 }
886 
convertGrayscale16ToRGBA64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)887 static const QRgba64 *QT_FASTCALL convertGrayscale16ToRGBA64(QRgba64 *buffer, const uint *src, int count,
888                                                            const QVector<QRgb> *, QDitherInfo *)
889 {
890     const unsigned short *s = reinterpret_cast<const unsigned short *>(src);
891     for (int i = 0; i < count; ++i)
892         buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535);
893     return buffer;
894 }
895 
fetchGrayscale16ToRGBA64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)896 static const QRgba64 *QT_FASTCALL fetchGrayscale16ToRGBA64(QRgba64 *buffer, const uchar *src, int index, int count,
897                                                          const QVector<QRgb> *, QDitherInfo *)
898 {
899     const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index;
900     for (int i = 0; i < count; ++i) {
901         buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535);
902     }
903     return buffer;
904 }
905 
storeARGB32FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)906 static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count,
907                                                 const QVector<QRgb> *, QDitherInfo *)
908 {
909     uint *d = reinterpret_cast<uint *>(dest) + index;
910     UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return qUnpremultiply(c); });
911 }
912 
storeRGBA8888PMFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)913 static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
914                                                     const QVector<QRgb> *, QDitherInfo *)
915 {
916     uint *d = reinterpret_cast<uint *>(dest) + index;
917     UNALIASED_CONVERSION_LOOP(d, src, count, ARGB2RGBA);
918 }
919 
920 #ifdef __SSE2__
921 template<bool RGBA, bool maskAlpha>
qConvertARGB32PMToRGBA64PM_sse2(QRgba64 * buffer,const uint * src,int count)922 static inline void qConvertARGB32PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
923 {
924     if (count <= 0)
925         return;
926 
927     const __m128i amask = _mm_set1_epi32(0xff000000);
928     int i = 0;
929     for (; ((uintptr_t)buffer & 0xf) && i < count; ++i) {
930         uint s = *src++;
931         if (maskAlpha)
932             s = s | 0xff000000;
933         if (RGBA)
934             s = RGBA2ARGB(s);
935         *buffer++ = QRgba64::fromArgb32(s);
936     }
937     for (; i < count-3; i += 4) {
938         __m128i vs = _mm_loadu_si128((const __m128i*)src);
939         if (maskAlpha)
940             vs = _mm_or_si128(vs, amask);
941         src += 4;
942         __m128i v1 = _mm_unpacklo_epi8(vs, vs);
943         __m128i v2 = _mm_unpackhi_epi8(vs, vs);
944         if (!RGBA) {
945             v1 = _mm_shufflelo_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
946             v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
947             v1 = _mm_shufflehi_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
948             v2 = _mm_shufflehi_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
949         }
950         _mm_store_si128((__m128i*)(buffer), v1);
951         buffer += 2;
952         _mm_store_si128((__m128i*)(buffer), v2);
953         buffer += 2;
954     }
955 
956     SIMD_EPILOGUE(i, count, 3) {
957         uint s = *src++;
958         if (maskAlpha)
959             s = s | 0xff000000;
960         if (RGBA)
961             s = RGBA2ARGB(s);
962         *buffer++ = QRgba64::fromArgb32(s);
963     }
964 }
965 
966 template<QtPixelOrder PixelOrder>
qConvertRGBA64PMToA2RGB30PM_sse2(uint * dest,const QRgba64 * buffer,int count)967 static inline void qConvertRGBA64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count)
968 {
969     const __m128i gmask = _mm_set1_epi32(0x000ffc00);
970     const __m128i cmask = _mm_set1_epi32(0x000003ff);
971     int i = 0;
972     __m128i vr, vg, vb, va;
973     for (; i < count && uintptr_t(buffer) & 0xF; ++i) {
974         *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
975     }
976 
977     for (; i < count-15; i += 16) {
978         // Repremultiplying is really expensive and hard to do in SIMD without AVX2,
979         // so we try to avoid it by checking if it is needed 16 samples at a time.
980         __m128i vOr = _mm_set1_epi32(0);
981         __m128i vAnd = _mm_set1_epi32(0xffffffff);
982         for (int j = 0; j < 16; j += 2) {
983             __m128i vs = _mm_load_si128((const __m128i*)(buffer + j));
984             vOr = _mm_or_si128(vOr, vs);
985             vAnd = _mm_and_si128(vAnd, vs);
986         }
987         const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7));
988         const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7));
989 
990         if (andAlpha == 0xffff) {
991             for (int j = 0; j < 16; j += 2) {
992                 __m128i vs = _mm_load_si128((const __m128i*)buffer);
993                 buffer += 2;
994                 vr = _mm_srli_epi64(vs, 6);
995                 vg = _mm_srli_epi64(vs, 16 + 6 - 10);
996                 vb = _mm_srli_epi64(vs, 32 + 6);
997                 vr = _mm_and_si128(vr, cmask);
998                 vg = _mm_and_si128(vg, gmask);
999                 vb = _mm_and_si128(vb, cmask);
1000                 va = _mm_srli_epi64(vs, 48 + 14);
1001                 if (PixelOrder == PixelOrderRGB)
1002                     vr = _mm_slli_epi32(vr, 20);
1003                 else
1004                     vb = _mm_slli_epi32(vb, 20);
1005                 va = _mm_slli_epi32(va, 30);
1006                 __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va));
1007                 vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
1008                 _mm_storel_epi64((__m128i*)dest, vd);
1009                 dest += 2;
1010             }
1011         } else if (orAlpha == 0) {
1012             for (int j = 0; j < 16; ++j) {
1013                 *dest++ = 0;
1014                 buffer++;
1015             }
1016         } else {
1017             for (int j = 0; j < 16; ++j)
1018                 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
1019         }
1020     }
1021 
1022     SIMD_EPILOGUE(i, count, 15)
1023         *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
1024 }
1025 #elif defined(__ARM_NEON__)
1026 template<bool RGBA, bool maskAlpha>
qConvertARGB32PMToRGBA64PM_neon(QRgba64 * buffer,const uint * src,int count)1027 static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count)
1028 {
1029     if (count <= 0)
1030         return;
1031 
1032     const uint32x4_t amask = vdupq_n_u32(0xff000000);
1033 #if defined(Q_PROCESSOR_ARM_64)
1034     const uint8x16_t rgbaMask  = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
1035 #else
1036     const uint8x8_t rgbaMask  = { 2, 1, 0, 3, 6, 5, 4, 7 };
1037 #endif
1038     int i = 0;
1039     for (; i < count-3; i += 4) {
1040         uint32x4_t vs32 = vld1q_u32(src);
1041         src += 4;
1042         if (maskAlpha)
1043             vs32 = vorrq_u32(vs32, amask);
1044         uint8x16_t vs8 = vreinterpretq_u8_u32(vs32);
1045         if (!RGBA) {
1046 #if defined(Q_PROCESSOR_ARM_64)
1047             vs8 = vqtbl1q_u8(vs8, rgbaMask);
1048 #else
1049             // no vqtbl1q_u8
1050             const uint8x8_t vlo = vtbl1_u8(vget_low_u8(vs8), rgbaMask);
1051             const uint8x8_t vhi = vtbl1_u8(vget_high_u8(vs8), rgbaMask);
1052             vs8 = vcombine_u8(vlo, vhi);
1053 #endif
1054         }
1055         uint8x16x2_t v = vzipq_u8(vs8, vs8);
1056 
1057         vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[0]));
1058         buffer += 2;
1059         vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[1]));
1060         buffer += 2;
1061     }
1062 
1063     SIMD_EPILOGUE(i, count, 3) {
1064         uint s = *src++;
1065         if (maskAlpha)
1066             s = s | 0xff000000;
1067         if (RGBA)
1068             s = RGBA2ARGB(s);
1069         *buffer++ = QRgba64::fromArgb32(s);
1070     }
1071 }
1072 #endif
1073 
convertRGB32ToRGB64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1074 static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uint *src, int count,
1075                                                       const QVector<QRgb> *, QDitherInfo *)
1076 {
1077 #ifdef __SSE2__
1078     qConvertARGB32PMToRGBA64PM_sse2<false, true>(buffer, src, count);
1079 #elif defined(__ARM_NEON__)
1080     qConvertARGB32PMToRGBA64PM_neon<false, true>(buffer, src, count);
1081 #else
1082     for (int i = 0; i < count; ++i)
1083         buffer[i] = QRgba64::fromArgb32(0xff000000 | src[i]);
1084 #endif
1085     return buffer;
1086 }
1087 
fetchRGB32ToRGB64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1088 static const QRgba64 *QT_FASTCALL fetchRGB32ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
1089                                                     const QVector<QRgb> *, QDitherInfo *)
1090 {
1091     return convertRGB32ToRGB64(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1092 }
1093 
convertARGB32ToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1094 static const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1095                                                           const QVector<QRgb> *, QDitherInfo *)
1096 {
1097     for (int i = 0; i < count; ++i)
1098         buffer[i] = QRgba64::fromArgb32(src[i]).premultiplied();
1099     return buffer;
1100 }
1101 
fetchARGB32ToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1102 static const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1103                                                         const QVector<QRgb> *, QDitherInfo *)
1104 {
1105     return convertARGB32ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1106 }
1107 
convertARGB32PMToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1108 static const QRgba64 *QT_FASTCALL convertARGB32PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1109                                                             const QVector<QRgb> *, QDitherInfo *)
1110 {
1111 #ifdef __SSE2__
1112     qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count);
1113 #elif defined(__ARM_NEON__)
1114     qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count);
1115 #else
1116     for (int i = 0; i < count; ++i)
1117         buffer[i] = QRgba64::fromArgb32(src[i]);
1118 #endif
1119     return buffer;
1120 }
1121 
fetchARGB32PMToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1122 static const QRgba64 *QT_FASTCALL fetchARGB32PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1123                                                           const QVector<QRgb> *, QDitherInfo *)
1124 {
1125     return convertARGB32PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1126 }
1127 
1128 #if QT_CONFIG(raster_64bit)
convertRGBA64ToRGBA64PM(QRgba64 * buffer,int count)1129 static void convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count)
1130 {
1131     for (int i = 0; i < count; ++i)
1132         buffer[i] = buffer[i].premultiplied();
1133 }
1134 
convertRGBA64PMToRGBA64PM(QRgba64 *,int)1135 static void convertRGBA64PMToRGBA64PM(QRgba64 *, int)
1136 {
1137 }
1138 #endif
1139 
fetchRGBA64ToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1140 static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1141                                                         const QVector<QRgb> *, QDitherInfo *)
1142 {
1143     const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1144     for (int i = 0; i < count; ++i)
1145         buffer[i] = QRgba64::fromRgba64(s[i]).premultiplied();
1146     return buffer;
1147 }
1148 
convertRGBA8888ToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1149 static const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1150                                                             const QVector<QRgb> *, QDitherInfo *)
1151 {
1152     for (int i = 0; i < count; ++i)
1153         buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i])).premultiplied();
1154     return buffer;
1155 }
1156 
fetchRGBA8888ToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1157 static const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1158                                                           const QVector<QRgb> *, QDitherInfo *)
1159 {
1160     return convertRGBA8888ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1161 }
1162 
convertRGBA8888PMToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1163 static const QRgba64 *QT_FASTCALL convertRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1164                                                               const QVector<QRgb> *, QDitherInfo *)
1165 {
1166 #ifdef __SSE2__
1167     qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count);
1168 #elif defined(__ARM_NEON__)
1169     qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count);
1170 #else
1171     for (int i = 0; i < count; ++i)
1172         buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i]));
1173 #endif
1174     return buffer;
1175 }
1176 
fetchRGBA8888PMToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1177 static const QRgba64 *QT_FASTCALL fetchRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1178                                                             const QVector<QRgb> *, QDitherInfo *)
1179 {
1180     return convertRGBA8888PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1181 }
1182 
storeRGBA8888FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1183 static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1184                                                   const QVector<QRgb> *, QDitherInfo *)
1185 {
1186     uint *d = reinterpret_cast<uint *>(dest) + index;
1187     UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(qUnpremultiply(c)); });
1188 }
1189 
storeRGBXFromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1190 static void QT_FASTCALL storeRGBXFromRGB32(uchar *dest, const uint *src, int index, int count,
1191                                            const QVector<QRgb> *, QDitherInfo *)
1192 {
1193     uint *d = reinterpret_cast<uint *>(dest) + index;
1194     UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); });
1195 }
1196 
storeRGBXFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1197 static void QT_FASTCALL storeRGBXFromARGB32PM(uchar *dest, const uint *src, int index, int count,
1198                                               const QVector<QRgb> *, QDitherInfo *)
1199 {
1200     uint *d = reinterpret_cast<uint *>(dest) + index;
1201     UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | qUnpremultiply(c)); });
1202 }
1203 
1204 template<QtPixelOrder PixelOrder>
convertA2RGB30PMToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)1205 static void QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
1206 {
1207     for (int i = 0; i < count; ++i)
1208         buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(buffer[i]);
1209 }
1210 
1211 template<QtPixelOrder PixelOrder>
fetchA2RGB30PMToARGB32PM(uint * buffer,const uchar * s,int index,int count,const QVector<QRgb> *,QDitherInfo * dither)1212 static const uint *QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint *buffer, const uchar *s, int index, int count,
1213                                                         const QVector<QRgb> *, QDitherInfo *dither)
1214 {
1215     const uint *src = reinterpret_cast<const uint *>(s) + index;
1216     if (!dither) {
1217         UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>);
1218     } else {
1219         for (int i = 0; i < count; ++i) {
1220             const uint c = src[i];
1221             short d10 = (qt_bayer_matrix[dither->y & 15][(dither->x + i) & 15] << 2);
1222             short a10 = (c >> 30) * 0x155;
1223             short r10 = ((c >> 20) & 0x3ff);
1224             short g10 = ((c >> 10) & 0x3ff);
1225             short b10 = (c & 0x3ff);
1226             if (PixelOrder == PixelOrderBGR)
1227                 std::swap(r10, b10);
1228             short a8 = (a10 + ((d10 - a10) >> 8)) >> 2;
1229             short r8 = (r10 + ((d10 - r10) >> 8)) >> 2;
1230             short g8 = (g10 + ((d10 - g10) >> 8)) >> 2;
1231             short b8 = (b10 + ((d10 - b10) >> 8)) >> 2;
1232             buffer[i] = qRgba(r8, g8, b8, a8);
1233         }
1234     }
1235     return buffer;
1236 }
1237 
1238 #ifdef __SSE2__
1239 template<QtPixelOrder PixelOrder>
qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 * buffer,const uint * src,int count)1240 static inline void qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
1241 {
1242     if (count <= 0)
1243         return;
1244 
1245     const __m128i rmask = _mm_set1_epi32(0x3ff00000);
1246     const __m128i gmask = _mm_set1_epi32(0x000ffc00);
1247     const __m128i bmask = _mm_set1_epi32(0x000003ff);
1248     const __m128i afactor = _mm_set1_epi16(0x5555);
1249     int i = 0;
1250 
1251     for (; ((uintptr_t)buffer & 0xf) && i < count; ++i)
1252         *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
1253 
1254     for (; i < count-3; i += 4) {
1255         __m128i vs = _mm_loadu_si128((const __m128i*)src);
1256         src += 4;
1257         __m128i va = _mm_srli_epi32(vs, 30);
1258         __m128i vr = _mm_and_si128(vs, rmask);
1259         __m128i vb = _mm_and_si128(vs, bmask);
1260         __m128i vg = _mm_and_si128(vs, gmask);
1261         va = _mm_mullo_epi16(va, afactor);
1262         vr = _mm_or_si128(_mm_srli_epi32(vr, 14), _mm_srli_epi32(vr, 24));
1263         vg = _mm_or_si128(_mm_srli_epi32(vg, 4), _mm_srli_epi32(vg, 14));
1264         vb = _mm_or_si128(_mm_slli_epi32(vb, 6), _mm_srli_epi32(vb, 4));
1265         __m128i vrb;
1266         if (PixelOrder == PixelOrderRGB)
1267              vrb = _mm_or_si128(vr, _mm_slli_si128(vb, 2));
1268         else
1269              vrb = _mm_or_si128(vb, _mm_slli_si128(vr, 2));
1270         __m128i vga = _mm_or_si128(vg, _mm_slli_si128(va, 2));
1271         _mm_store_si128((__m128i*)(buffer), _mm_unpacklo_epi16(vrb, vga));
1272         buffer += 2;
1273         _mm_store_si128((__m128i*)(buffer), _mm_unpackhi_epi16(vrb, vga));
1274         buffer += 2;
1275     }
1276 
1277     SIMD_EPILOGUE(i, count, 3)
1278         *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
1279 }
1280 #endif
1281 
1282 template<QtPixelOrder PixelOrder>
convertA2RGB30PMToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1283 static const QRgba64 *QT_FASTCALL convertA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1284                                                              const QVector<QRgb> *, QDitherInfo *)
1285 {
1286 #ifdef __SSE2__
1287     qConvertA2RGB30PMToRGBA64PM_sse2<PixelOrder>(buffer, src, count);
1288 #else
1289     for (int i = 0; i < count; ++i)
1290         buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]);
1291 #endif
1292     return buffer;
1293 }
1294 
1295 template<QtPixelOrder PixelOrder>
fetchA2RGB30PMToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1296 static const QRgba64 *QT_FASTCALL fetchA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1297                                                            const QVector<QRgb> *, QDitherInfo *)
1298 {
1299     return convertA2RGB30PMToRGBA64PM<PixelOrder>(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1300 }
1301 
1302 template<QtPixelOrder PixelOrder>
storeA2RGB30PMFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1303 static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
1304                                                    const QVector<QRgb> *, QDitherInfo *)
1305 {
1306     uint *d = reinterpret_cast<uint *>(dest) + index;
1307     UNALIASED_CONVERSION_LOOP(d, src, count, qConvertArgb32ToA2rgb30<PixelOrder>);
1308 }
1309 
1310 template<QtPixelOrder PixelOrder>
storeRGB30FromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1311 static void QT_FASTCALL storeRGB30FromRGB32(uchar *dest, const uint *src, int index, int count,
1312                                             const QVector<QRgb> *, QDitherInfo *)
1313 {
1314     uint *d = reinterpret_cast<uint *>(dest) + index;
1315     UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1316 }
1317 
1318 template<QtPixelOrder PixelOrder>
storeRGB30FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1319 static void QT_FASTCALL storeRGB30FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1320                                                const QVector<QRgb> *, QDitherInfo *)
1321 {
1322     uint *d = reinterpret_cast<uint *>(dest) + index;
1323     UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1324 }
1325 
1326 template<bool RGBA>
qt_convertRGBA64ToARGB32(uint * dst,const QRgba64 * src,int count)1327 void qt_convertRGBA64ToARGB32(uint *dst, const QRgba64 *src, int count)
1328 {
1329     int i = 0;
1330 #ifdef __SSE2__
1331     if (((uintptr_t)dst & 0x7) && count > 0) {
1332         uint s = (*src++).toArgb32();
1333         if (RGBA)
1334             s = ARGB2RGBA(s);
1335         *dst++ = s;
1336         i++;
1337     }
1338     const __m128i vhalf = _mm_set1_epi32(0x80);
1339     const __m128i vzero = _mm_setzero_si128();
1340     for (; i < count-1; i += 2) {
1341         __m128i vs = _mm_loadu_si128((const __m128i*)src);
1342         src += 2;
1343         if (!RGBA) {
1344             vs = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
1345             vs = _mm_shufflehi_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
1346         }
1347         __m128i v1 = _mm_unpacklo_epi16(vs, vzero);
1348         __m128i v2 = _mm_unpackhi_epi16(vs, vzero);
1349         v1 = _mm_add_epi32(v1, vhalf);
1350         v2 = _mm_add_epi32(v2, vhalf);
1351         v1 = _mm_sub_epi32(v1, _mm_srli_epi32(v1, 8));
1352         v2 = _mm_sub_epi32(v2, _mm_srli_epi32(v2, 8));
1353         v1 = _mm_srli_epi32(v1, 8);
1354         v2 = _mm_srli_epi32(v2, 8);
1355         v1 = _mm_packs_epi32(v1, v2);
1356         v1 = _mm_packus_epi16(v1, vzero);
1357         _mm_storel_epi64((__m128i*)(dst), v1);
1358         dst += 2;
1359     }
1360 #endif
1361     for (; i < count; i++) {
1362         uint s = (*src++).toArgb32();
1363         if (RGBA)
1364             s = ARGB2RGBA(s);
1365         *dst++ = s;
1366     }
1367 }
1368 template void qt_convertRGBA64ToARGB32<false>(uint *dst, const QRgba64 *src, int count);
1369 template void qt_convertRGBA64ToARGB32<true>(uint *dst, const QRgba64 *src, int count);
1370 
1371 
storeAlpha8FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1372 static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1373                                                 const QVector<QRgb> *, QDitherInfo *)
1374 {
1375     for (int i = 0; i < count; ++i)
1376         dest[index + i] = qAlpha(src[i]);
1377 }
1378 
storeGrayscale8FromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1379 static void QT_FASTCALL storeGrayscale8FromRGB32(uchar *dest, const uint *src, int index, int count,
1380                                                  const QVector<QRgb> *, QDitherInfo *)
1381 {
1382     for (int i = 0; i < count; ++i)
1383         dest[index + i] = qGray(src[i]);
1384 }
1385 
storeGrayscale8FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1386 static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1387                                                     const QVector<QRgb> *, QDitherInfo *)
1388 {
1389     for (int i = 0; i < count; ++i)
1390         dest[index + i] = qGray(qUnpremultiply(src[i]));
1391 }
1392 
storeGrayscale16FromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1393 static void QT_FASTCALL storeGrayscale16FromRGB32(uchar *dest, const uint *src, int index, int count,
1394                                                  const QVector<QRgb> *, QDitherInfo *)
1395 {
1396     unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index;
1397     for (int i = 0; i < count; ++i)
1398         d[i] = qGray(src[i]) * 257;
1399 }
1400 
storeGrayscale16FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1401 static void QT_FASTCALL storeGrayscale16FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1402                                                     const QVector<QRgb> *, QDitherInfo *)
1403 {
1404     unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index;
1405     for (int i = 0; i < count; ++i)
1406         d[i] = qGray(qUnpremultiply(src[i])) * 257;
1407 }
1408 
fetchRGB64ToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1409 static const uint *QT_FASTCALL fetchRGB64ToRGB32(uint *buffer, const uchar *src, int index, int count,
1410                                                  const QVector<QRgb> *, QDitherInfo *)
1411 {
1412     const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1413     for (int i = 0; i < count; ++i)
1414         buffer[i] = toArgb32(s[i]);
1415     return buffer;
1416 }
1417 
storeRGB64FromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1418 static void QT_FASTCALL storeRGB64FromRGB32(uchar *dest, const uint *src, int index, int count,
1419                                             const QVector<QRgb> *, QDitherInfo *)
1420 {
1421     QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1422     for (int i = 0; i < count; ++i)
1423         d[i] = QRgba64::fromArgb32(src[i]);
1424 }
1425 
fetchRGBA64ToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1426 static const uint *QT_FASTCALL fetchRGBA64ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
1427                                                      const QVector<QRgb> *, QDitherInfo *)
1428 {
1429     const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1430     for (int i = 0; i < count; ++i)
1431         buffer[i] = toArgb32(s[i].premultiplied());
1432     return buffer;
1433 }
1434 
storeRGBA64FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1435 static void QT_FASTCALL storeRGBA64FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1436                                                 const QVector<QRgb> *, QDitherInfo *)
1437 {
1438     QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1439     for (int i = 0; i < count; ++i)
1440         d[i] = QRgba64::fromArgb32(src[i]).unpremultiplied();
1441 }
1442 
1443 // Note:
1444 // convertToArgb32() assumes that no color channel is less than 4 bits.
1445 // storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits.
1446 // QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits.
1447 QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
1448     { false, false, QPixelLayout::BPPNone, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, // Format_Invalid
1449     { false, false, QPixelLayout::BPP1MSB, nullptr,
1450       convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1451       fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1MSB>,
1452       nullptr, nullptr }, // Format_Mono
1453     { false, false, QPixelLayout::BPP1LSB, nullptr,
1454       convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1455       fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1LSB>,
1456       nullptr, nullptr }, // Format_MonoLSB
1457     { false, false, QPixelLayout::BPP8, nullptr,
1458       convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1459       fetchIndexedToARGB32PM<QPixelLayout::BPP8>, fetchIndexedToRGBA64PM<QPixelLayout::BPP8>,
1460       nullptr, nullptr }, // Format_Indexed8
1461     // Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong,
1462     // but everywhere this generic conversion would be wrong is currently overloaded.
1463     { false, false, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough,
1464       convertRGB32ToRGB64, fetchPassThrough, fetchRGB32ToRGB64, storePassThrough, storePassThrough }, // Format_RGB32
1465     { true, false, QPixelLayout::BPP32, rbSwap_rgb32, convertARGB32ToARGB32PM,
1466       convertARGB32ToRGBA64PM, fetchARGB32ToARGB32PM, fetchARGB32ToRGBA64PM, storeARGB32FromARGB32PM, storePassThrough }, // Format_ARGB32
1467     { true, true, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough,
1468       convertARGB32PMToRGBA64PM, fetchPassThrough, fetchARGB32PMToRGBA64PM, storePassThrough, storePassThrough }, // Format_ARGB32_Premultiplied
1469     pixelLayoutRGB<QImage::Format_RGB16>(),
1470     pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(),
1471     pixelLayoutRGB<QImage::Format_RGB666>(),
1472     pixelLayoutARGBPM<QImage::Format_ARGB6666_Premultiplied>(),
1473     pixelLayoutRGB<QImage::Format_RGB555>(),
1474     pixelLayoutARGBPM<QImage::Format_ARGB8555_Premultiplied>(),
1475     pixelLayoutRGB<QImage::Format_RGB888>(),
1476     pixelLayoutRGB<QImage::Format_RGB444>(),
1477     pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(),
1478     { false, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM,
1479       convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBXFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBX8888
1480     { true, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888ToARGB32PM,
1481       convertRGBA8888ToRGBA64PM, fetchRGBA8888ToARGB32PM, fetchRGBA8888ToRGBA64PM, storeRGBA8888FromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888
1482     { true, true, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM,
1483       convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBA8888PMFromARGB32PM, storeRGBXFromRGB32 },  // Format_RGBA8888_Premultiplied
1484     { false, false, QPixelLayout::BPP32, rbSwap_rgb30,
1485       convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1486       convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1487       fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1488       fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1489       storeRGB30FromARGB32PM<PixelOrderBGR>,
1490       storeRGB30FromRGB32<PixelOrderBGR>
1491     }, // Format_BGR30
1492     { true, true, QPixelLayout::BPP32, rbSwap_rgb30,
1493       convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1494       convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1495       fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1496       fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1497       storeA2RGB30PMFromARGB32PM<PixelOrderBGR>,
1498       storeRGB30FromRGB32<PixelOrderBGR>
1499     },  // Format_A2BGR30_Premultiplied
1500     { false, false, QPixelLayout::BPP32, rbSwap_rgb30,
1501       convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1502       convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1503       fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1504       fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1505       storeRGB30FromARGB32PM<PixelOrderRGB>,
1506       storeRGB30FromRGB32<PixelOrderRGB>
1507     }, // Format_RGB30
1508     { true, true, QPixelLayout::BPP32, rbSwap_rgb30,
1509       convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1510       convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1511       fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1512       fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1513       storeA2RGB30PMFromARGB32PM<PixelOrderRGB>,
1514       storeRGB30FromRGB32<PixelOrderRGB>
1515     },  // Format_A2RGB30_Premultiplied
1516     { true, true, QPixelLayout::BPP8, nullptr,
1517       convertAlpha8ToRGB32, convertAlpha8ToRGB64,
1518       fetchAlpha8ToRGB32, fetchAlpha8ToRGB64,
1519       storeAlpha8FromARGB32PM, nullptr }, // Format_Alpha8
1520     { false, false, QPixelLayout::BPP8, nullptr,
1521       convertGrayscale8ToRGB32, convertGrayscale8ToRGB64,
1522       fetchGrayscale8ToRGB32, fetchGrayscale8ToRGB64,
1523       storeGrayscale8FromARGB32PM, storeGrayscale8FromRGB32 }, // Format_Grayscale8
1524     { false, false, QPixelLayout::BPP64, nullptr,
1525       convertPassThrough, nullptr,
1526       fetchRGB64ToRGB32, fetchPassThrough64,
1527       storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBX64
1528     { true, false, QPixelLayout::BPP64, nullptr,
1529       convertARGB32ToARGB32PM, nullptr,
1530       fetchRGBA64ToARGB32PM, fetchRGBA64ToRGBA64PM,
1531       storeRGBA64FromARGB32PM, storeRGB64FromRGB32 }, // Format_RGBA64
1532     { true, true, QPixelLayout::BPP64, nullptr,
1533       convertPassThrough, nullptr,
1534       fetchRGB64ToRGB32, fetchPassThrough64,
1535       storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBA64_Premultiplied
1536     { false, false, QPixelLayout::BPP16, nullptr,
1537       convertGrayscale16ToRGB32, convertGrayscale16ToRGBA64,
1538       fetchGrayscale16ToRGB32, fetchGrayscale16ToRGBA64,
1539       storeGrayscale16FromARGB32PM, storeGrayscale16FromRGB32 }, // Format_Grayscale16
1540     pixelLayoutRGB<QImage::Format_BGR888>(),
1541 };
1542 
1543 Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats);
1544 
convertFromRgb64(uint * dest,const QRgba64 * src,int length)1545 static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length)
1546 {
1547     for (int i = 0; i < length; ++i) {
1548         dest[i] = toArgb32(src[i]);
1549     }
1550 }
1551 
1552 template<QImage::Format format>
storeGenericFromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> * clut,QDitherInfo * dither)1553 static void QT_FASTCALL storeGenericFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1554                                                  const QVector<QRgb> *clut, QDitherInfo *dither)
1555 {
1556     uint buffer[BufferSize];
1557     convertFromRgb64(buffer, src, count);
1558     qPixelLayouts[format].storeFromARGB32PM(dest, buffer, index, count, clut, dither);
1559 }
1560 
storeARGB32FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1561 static void QT_FASTCALL storeARGB32FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1562                                                 const QVector<QRgb> *, QDitherInfo *)
1563 {
1564     uint *d = (uint*)dest + index;
1565     for (int i = 0; i < count; ++i)
1566         d[i] = toArgb32(src[i].unpremultiplied());
1567 }
1568 
storeRGBA8888FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1569 static void QT_FASTCALL storeRGBA8888FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1570                                                   const QVector<QRgb> *, QDitherInfo *)
1571 {
1572     uint *d = (uint*)dest + index;
1573     for (int i = 0; i < count; ++i)
1574         d[i] = toRgba8888(src[i].unpremultiplied());
1575 }
1576 
1577 template<QtPixelOrder PixelOrder>
storeRGB30FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1578 static void QT_FASTCALL storeRGB30FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1579                                                const QVector<QRgb> *, QDitherInfo *)
1580 {
1581     uint *d = (uint*)dest + index;
1582 #ifdef __SSE2__
1583     qConvertRGBA64PMToA2RGB30PM_sse2<PixelOrder>(d, src, count);
1584 #else
1585     for (int i = 0; i < count; ++i)
1586         d[i] = qConvertRgb64ToRgb30<PixelOrder>(src[i]);
1587 #endif
1588 }
1589 
storeRGBX64FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1590 static void QT_FASTCALL storeRGBX64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1591                                                 const QVector<QRgb> *, QDitherInfo *)
1592 {
1593     QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1594     for (int i = 0; i < count; ++i) {
1595         d[i] = src[i].unpremultiplied();
1596         d[i].setAlpha(65535);
1597     }
1598 }
1599 
storeRGBA64FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1600 static void QT_FASTCALL storeRGBA64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1601                                                 const QVector<QRgb> *, QDitherInfo *)
1602 {
1603     QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1604     for (int i = 0; i < count; ++i)
1605         d[i] = src[i].unpremultiplied();
1606 }
1607 
storeRGBA64PMFromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1608 static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1609                                                   const QVector<QRgb> *, QDitherInfo *)
1610 {
1611     QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1612     if (d != src)
1613         memcpy(d, src, count * sizeof(QRgba64));
1614 }
1615 
storeGray16FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1616 static void QT_FASTCALL storeGray16FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1617                                                 const QVector<QRgb> *, QDitherInfo *)
1618 {
1619     quint16 *d = reinterpret_cast<quint16*>(dest) + index;
1620     for (int i = 0; i < count; ++i) {
1621         QRgba64 s =  src[i].unpremultiplied();
1622         d[i] = qGray(s.red(), s.green(), s.blue());
1623     }
1624 }
1625 
1626 ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = {
1627     nullptr,
1628     nullptr,
1629     nullptr,
1630     nullptr,
1631     storeGenericFromRGBA64PM<QImage::Format_RGB32>,
1632     storeARGB32FromRGBA64PM,
1633     storeGenericFromRGBA64PM<QImage::Format_ARGB32_Premultiplied>,
1634     storeGenericFromRGBA64PM<QImage::Format_RGB16>,
1635     storeGenericFromRGBA64PM<QImage::Format_ARGB8565_Premultiplied>,
1636     storeGenericFromRGBA64PM<QImage::Format_RGB666>,
1637     storeGenericFromRGBA64PM<QImage::Format_ARGB6666_Premultiplied>,
1638     storeGenericFromRGBA64PM<QImage::Format_RGB555>,
1639     storeGenericFromRGBA64PM<QImage::Format_ARGB8555_Premultiplied>,
1640     storeGenericFromRGBA64PM<QImage::Format_RGB888>,
1641     storeGenericFromRGBA64PM<QImage::Format_RGB444>,
1642     storeGenericFromRGBA64PM<QImage::Format_ARGB4444_Premultiplied>,
1643     storeGenericFromRGBA64PM<QImage::Format_RGBX8888>,
1644     storeRGBA8888FromRGBA64PM,
1645     storeGenericFromRGBA64PM<QImage::Format_RGBA8888_Premultiplied>,
1646     storeRGB30FromRGBA64PM<PixelOrderBGR>,
1647     storeRGB30FromRGBA64PM<PixelOrderBGR>,
1648     storeRGB30FromRGBA64PM<PixelOrderRGB>,
1649     storeRGB30FromRGBA64PM<PixelOrderRGB>,
1650     storeGenericFromRGBA64PM<QImage::Format_Alpha8>,
1651     storeGenericFromRGBA64PM<QImage::Format_Grayscale8>,
1652     storeRGBX64FromRGBA64PM,
1653     storeRGBA64FromRGBA64PM,
1654     storeRGBA64PMFromRGBA64PM,
1655     storeGray16FromRGBA64PM,
1656     storeGenericFromRGBA64PM<QImage::Format_BGR888>,
1657 };
1658 
1659 /*
1660   Destination fetch. This is simple as we don't have to do bounds checks or
1661   transformations
1662 */
1663 
destFetchMono(uint * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1664 static uint * QT_FASTCALL destFetchMono(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1665 {
1666     uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1667     uint *start = buffer;
1668     const uint *end = buffer + length;
1669     while (buffer < end) {
1670         *buffer = data[x>>3] & (0x80 >> (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1671         ++buffer;
1672         ++x;
1673     }
1674     return start;
1675 }
1676 
destFetchMonoLsb(uint * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1677 static uint * QT_FASTCALL destFetchMonoLsb(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1678 {
1679     uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1680     uint *start = buffer;
1681     const uint *end = buffer + length;
1682     while (buffer < end) {
1683         *buffer = data[x>>3] & (0x1 << (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1684         ++buffer;
1685         ++x;
1686     }
1687     return start;
1688 }
1689 
destFetchARGB32P(uint *,QRasterBuffer * rasterBuffer,int x,int y,int)1690 static uint * QT_FASTCALL destFetchARGB32P(uint *, QRasterBuffer *rasterBuffer, int x, int y, int)
1691 {
1692     return (uint *)rasterBuffer->scanLine(y) + x;
1693 }
1694 
destFetchRGB16(uint * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1695 static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1696 {
1697     const ushort *Q_DECL_RESTRICT data = (const ushort *)rasterBuffer->scanLine(y) + x;
1698     for (int i = 0; i < length; ++i)
1699         buffer[i] = qConvertRgb16To32(data[i]);
1700     return buffer;
1701 }
1702 
destFetch(uint * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1703 static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1704 {
1705     const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1706     return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
1707 }
1708 
destFetchUndefined(uint * buffer,QRasterBuffer *,int,int,int)1709 static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int)
1710 {
1711     return buffer;
1712 }
1713 
1714 static DestFetchProc destFetchProc[QImage::NImageFormats] =
1715 {
1716     nullptr,            // Format_Invalid
1717     destFetchMono,      // Format_Mono,
1718     destFetchMonoLsb,   // Format_MonoLSB
1719     nullptr,            // Format_Indexed8
1720     destFetchARGB32P,   // Format_RGB32
1721     destFetch,          // Format_ARGB32,
1722     destFetchARGB32P,   // Format_ARGB32_Premultiplied
1723     destFetchRGB16,     // Format_RGB16
1724     destFetch,          // Format_ARGB8565_Premultiplied
1725     destFetch,          // Format_RGB666
1726     destFetch,          // Format_ARGB6666_Premultiplied
1727     destFetch,          // Format_RGB555
1728     destFetch,          // Format_ARGB8555_Premultiplied
1729     destFetch,          // Format_RGB888
1730     destFetch,          // Format_RGB444
1731     destFetch,          // Format_ARGB4444_Premultiplied
1732     destFetch,          // Format_RGBX8888
1733     destFetch,          // Format_RGBA8888
1734     destFetch,          // Format_RGBA8888_Premultiplied
1735     destFetch,          // Format_BGR30
1736     destFetch,          // Format_A2BGR30_Premultiplied
1737     destFetch,          // Format_RGB30
1738     destFetch,          // Format_A2RGB30_Premultiplied
1739     destFetch,          // Format_Alpha8
1740     destFetch,          // Format_Grayscale8
1741     destFetch,          // Format_RGBX64
1742     destFetch,          // Format_RGBA64
1743     destFetch,          // Format_RGBA64_Premultiplied
1744     destFetch,          // Format_Grayscale16
1745     destFetch,          // Format_BGR888
1746 };
1747 
1748 #if QT_CONFIG(raster_64bit)
destFetch64(QRgba64 * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1749 static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1750 {
1751     const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1752     return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
1753 }
1754 
destFetchRGB64(QRgba64 *,QRasterBuffer * rasterBuffer,int x,int y,int)1755 static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int)
1756 {
1757     return (QRgba64 *)rasterBuffer->scanLine(y) + x;
1758 }
1759 
destFetch64Undefined(QRgba64 * buffer,QRasterBuffer *,int,int,int)1760 static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int)
1761 {
1762     return buffer;
1763 }
1764 
1765 static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
1766 {
1767     nullptr,            // Format_Invalid
1768     nullptr,            // Format_Mono,
1769     nullptr,            // Format_MonoLSB
1770     nullptr,            // Format_Indexed8
1771     destFetch64,        // Format_RGB32
1772     destFetch64,        // Format_ARGB32,
1773     destFetch64,        // Format_ARGB32_Premultiplied
1774     destFetch64,        // Format_RGB16
1775     destFetch64,        // Format_ARGB8565_Premultiplied
1776     destFetch64,        // Format_RGB666
1777     destFetch64,        // Format_ARGB6666_Premultiplied
1778     destFetch64,        // Format_RGB555
1779     destFetch64,        // Format_ARGB8555_Premultiplied
1780     destFetch64,        // Format_RGB888
1781     destFetch64,        // Format_RGB444
1782     destFetch64,        // Format_ARGB4444_Premultiplied
1783     destFetch64,        // Format_RGBX8888
1784     destFetch64,        // Format_RGBA8888
1785     destFetch64,        // Format_RGBA8888_Premultiplied
1786     destFetch64,        // Format_BGR30
1787     destFetch64,        // Format_A2BGR30_Premultiplied
1788     destFetch64,        // Format_RGB30
1789     destFetch64,        // Format_A2RGB30_Premultiplied
1790     destFetch64,        // Format_Alpha8
1791     destFetch64,        // Format_Grayscale8
1792     destFetchRGB64,     // Format_RGBX64
1793     destFetch64,        // Format_RGBA64
1794     destFetchRGB64,     // Format_RGBA64_Premultiplied
1795     destFetch64,        // Format_Grayscale16
1796     destFetch64,        // Format_BGR888
1797 };
1798 #endif
1799 
1800 /*
1801    Returns the color in the mono destination color table
1802    that is the "nearest" to /color/.
1803 */
findNearestColor(QRgb color,QRasterBuffer * rbuf)1804 static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf)
1805 {
1806     QRgb color_0 = qPremultiply(rbuf->destColor0);
1807     QRgb color_1 = qPremultiply(rbuf->destColor1);
1808     color = qPremultiply(color);
1809 
1810     int r = qRed(color);
1811     int g = qGreen(color);
1812     int b = qBlue(color);
1813     int rx, gx, bx;
1814     int dist_0, dist_1;
1815 
1816     rx = r - qRed(color_0);
1817     gx = g - qGreen(color_0);
1818     bx = b - qBlue(color_0);
1819     dist_0 = rx*rx + gx*gx + bx*bx;
1820 
1821     rx = r - qRed(color_1);
1822     gx = g - qGreen(color_1);
1823     bx = b - qBlue(color_1);
1824     dist_1 = rx*rx + gx*gx + bx*bx;
1825 
1826     if (dist_0 < dist_1)
1827         return color_0;
1828     return color_1;
1829 }
1830 
1831 /*
1832   Destination store.
1833 */
1834 
destStoreMono(QRasterBuffer * rasterBuffer,int x,int y,const uint * buffer,int length)1835 static void QT_FASTCALL destStoreMono(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1836 {
1837     uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1838     if (rasterBuffer->monoDestinationWithClut) {
1839         for (int i = 0; i < length; ++i) {
1840             if (buffer[i] == rasterBuffer->destColor0) {
1841                 data[x >> 3] &= ~(0x80 >> (x & 7));
1842             } else if (buffer[i] == rasterBuffer->destColor1) {
1843                 data[x >> 3] |= 0x80 >> (x & 7);
1844             } else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) {
1845                 data[x >> 3] &= ~(0x80 >> (x & 7));
1846             } else {
1847                 data[x >> 3] |= 0x80 >> (x & 7);
1848             }
1849             ++x;
1850         }
1851     } else {
1852         for (int i = 0; i < length; ++i) {
1853             if (qGray(buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
1854                 data[x >> 3] |= 0x80 >> (x & 7);
1855             else
1856                 data[x >> 3] &= ~(0x80 >> (x & 7));
1857             ++x;
1858         }
1859     }
1860 }
1861 
destStoreMonoLsb(QRasterBuffer * rasterBuffer,int x,int y,const uint * buffer,int length)1862 static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1863 {
1864     uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1865     if (rasterBuffer->monoDestinationWithClut) {
1866         for (int i = 0; i < length; ++i) {
1867             if (buffer[i] == rasterBuffer->destColor0) {
1868                 data[x >> 3] &= ~(1 << (x & 7));
1869             } else if (buffer[i] == rasterBuffer->destColor1) {
1870                 data[x >> 3] |= 1 << (x & 7);
1871             } else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) {
1872                 data[x >> 3] &= ~(1 << (x & 7));
1873             } else {
1874                 data[x >> 3] |= 1 << (x & 7);
1875             }
1876             ++x;
1877         }
1878     } else {
1879         for (int i = 0; i < length; ++i) {
1880             if (qGray(buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
1881                 data[x >> 3] |= 1 << (x & 7);
1882             else
1883                 data[x >> 3] &= ~(1 << (x & 7));
1884             ++x;
1885         }
1886     }
1887 }
1888 
destStoreRGB16(QRasterBuffer * rasterBuffer,int x,int y,const uint * buffer,int length)1889 static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1890 {
1891     quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x;
1892     for (int i = 0; i < length; ++i)
1893         data[i] = qConvertRgb32To16(buffer[i]);
1894 }
1895 
destStore(QRasterBuffer * rasterBuffer,int x,int y,const uint * buffer,int length)1896 static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1897 {
1898     const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1899     ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
1900     if (!layout->premultiplied && !layout->hasAlphaChannel)
1901         store = layout->storeFromRGB32;
1902     uchar *dest = rasterBuffer->scanLine(y);
1903     store(dest, buffer, x, length, nullptr, nullptr);
1904 }
1905 
1906 static DestStoreProc destStoreProc[QImage::NImageFormats] =
1907 {
1908     nullptr,            // Format_Invalid
1909     destStoreMono,      // Format_Mono,
1910     destStoreMonoLsb,   // Format_MonoLSB
1911     nullptr,            // Format_Indexed8
1912     nullptr,            // Format_RGB32
1913     destStore,          // Format_ARGB32,
1914     nullptr,            // Format_ARGB32_Premultiplied
1915     destStoreRGB16,     // Format_RGB16
1916     destStore,          // Format_ARGB8565_Premultiplied
1917     destStore,          // Format_RGB666
1918     destStore,          // Format_ARGB6666_Premultiplied
1919     destStore,          // Format_RGB555
1920     destStore,          // Format_ARGB8555_Premultiplied
1921     destStore,          // Format_RGB888
1922     destStore,          // Format_RGB444
1923     destStore,          // Format_ARGB4444_Premultiplied
1924     destStore,          // Format_RGBX8888
1925     destStore,          // Format_RGBA8888
1926     destStore,          // Format_RGBA8888_Premultiplied
1927     destStore,          // Format_BGR30
1928     destStore,          // Format_A2BGR30_Premultiplied
1929     destStore,          // Format_RGB30
1930     destStore,          // Format_A2RGB30_Premultiplied
1931     destStore,          // Format_Alpha8
1932     destStore,          // Format_Grayscale8
1933     destStore,          // Format_RGBX64
1934     destStore,          // Format_RGBA64
1935     destStore,          // Format_RGBA64_Premultiplied
1936     destStore,          // Format_Grayscale16
1937     destStore,          // Format_BGR888
1938 };
1939 
1940 #if QT_CONFIG(raster_64bit)
destStore64(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 * buffer,int length)1941 static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
1942 {
1943     auto store = qStoreFromRGBA64PM[rasterBuffer->format];
1944     uchar *dest = rasterBuffer->scanLine(y);
1945     store(dest, buffer, x, length, nullptr, nullptr);
1946 }
1947 
destStore64RGBA64(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 * buffer,int length)1948 static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
1949 {
1950     QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x;
1951     for (int i = 0; i < length; ++i) {
1952         dest[i] = buffer[i].unpremultiplied();
1953     }
1954 }
1955 
1956 static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
1957 {
1958     nullptr,            // Format_Invalid
1959     nullptr,            // Format_Mono,
1960     nullptr,            // Format_MonoLSB
1961     nullptr,            // Format_Indexed8
1962     destStore64,        // Format_RGB32
1963     destStore64,        // Format_ARGB32,
1964     destStore64,        // Format_ARGB32_Premultiplied
1965     destStore64,        // Format_RGB16
1966     destStore64,        // Format_ARGB8565_Premultiplied
1967     destStore64,        // Format_RGB666
1968     destStore64,        // Format_ARGB6666_Premultiplied
1969     destStore64,        // Format_RGB555
1970     destStore64,        // Format_ARGB8555_Premultiplied
1971     destStore64,        // Format_RGB888
1972     destStore64,        // Format_RGB444
1973     destStore64,        // Format_ARGB4444_Premultiplied
1974     destStore64,        // Format_RGBX8888
1975     destStore64,        // Format_RGBA8888
1976     destStore64,        // Format_RGBA8888_Premultiplied
1977     destStore64,        // Format_BGR30
1978     destStore64,        // Format_A2BGR30_Premultiplied
1979     destStore64,        // Format_RGB30
1980     destStore64,        // Format_A2RGB30_Premultiplied
1981     destStore64,        // Format_Alpha8
1982     destStore64,        // Format_Grayscale8
1983     nullptr,            // Format_RGBX64
1984     destStore64RGBA64,  // Format_RGBA64
1985     nullptr,            // Format_RGBA64_Premultiplied
1986     destStore64,        // Format_Grayscale16
1987     destStore64,        // Format_BGR888
1988 };
1989 #endif
1990 
1991 /*
1992   Source fetches
1993 
1994   This is a bit more complicated, as we need several fetch routines for every surface type
1995 
1996   We need 5 fetch methods per surface type:
1997   untransformed
1998   transformed (tiled and not tiled)
1999   transformed bilinear (tiled and not tiled)
2000 
2001   We don't need bounds checks for untransformed, but we need them for the other ones.
2002 
2003   The generic implementation does pixel by pixel fetches
2004 */
2005 
2006 enum TextureBlendType {
2007     BlendUntransformed,
2008     BlendTiled,
2009     BlendTransformed,
2010     BlendTransformedTiled,
2011     BlendTransformedBilinear,
2012     BlendTransformedBilinearTiled,
2013     NBlendTypes
2014 };
2015 
fetchUntransformed(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2016 static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator *,
2017                                                   const QSpanData *data, int y, int x, int length)
2018 {
2019     const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2020     return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2021 }
2022 
fetchUntransformedARGB32PM(uint *,const Operator *,const QSpanData * data,int y,int x,int)2023 static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *,
2024                                                           const QSpanData *data, int y, int x, int)
2025 {
2026     const uchar *scanLine = data->texture.scanLine(y);
2027     return reinterpret_cast<const uint *>(scanLine) + x;
2028 }
2029 
fetchUntransformedRGB16(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2030 static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *,
2031                                                        const QSpanData *data, int y, int x,
2032                                                        int length)
2033 {
2034     const quint16 *scanLine = (const quint16 *)data->texture.scanLine(y) + x;
2035     for (int i = 0; i < length; ++i)
2036         buffer[i] = qConvertRgb16To32(scanLine[i]);
2037     return buffer;
2038 }
2039 
2040 #if QT_CONFIG(raster_64bit)
fetchUntransformed64(QRgba64 * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2041 static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Operator *,
2042                                                        const QSpanData *data, int y, int x, int length)
2043 {
2044     const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2045     return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2046 }
2047 
fetchUntransformedRGBA64PM(QRgba64 *,const Operator *,const QSpanData * data,int y,int x,int)2048 static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *,
2049                                                              const QSpanData *data, int y, int x, int)
2050 {
2051     const uchar *scanLine = data->texture.scanLine(y);
2052     return reinterpret_cast<const QRgba64 *>(scanLine) + x;
2053 }
2054 #endif
2055 
2056 template<TextureBlendType blendType>
fetchTransformed_pixelBounds(int max,int l1,int l2,int & v)2057 inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
2058 {
2059     Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2060     if (blendType == BlendTransformedTiled) {
2061         if (v < 0 || v >= max) {
2062             v %= max;
2063             if (v < 0) v += max;
2064         }
2065     } else {
2066         v = qBound(l1, v, l2);
2067     }
2068 }
2069 
canUseFastMatrixPath(const qreal cx,const qreal cy,const qsizetype length,const QSpanData * data)2070 static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data)
2071 {
2072     if (Q_UNLIKELY(!data->fast_matrix))
2073         return false;
2074 
2075     qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale;
2076     qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale;
2077     qreal minc = std::min(fx, fy);
2078     qreal maxc = std::max(fx, fy);
2079     fx += std::trunc(data->m11 * fixed_scale) * length;
2080     fy += std::trunc(data->m12 * fixed_scale) * length;
2081     minc = std::min(minc, std::min(fx, fy));
2082     maxc = std::max(maxc, std::max(fx, fy));
2083 
2084     return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max();
2085 }
2086 
2087 template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
fetchTransformed_fetcher(T * buffer,const QSpanData * data,int y,int x,int length)2088 static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data,
2089                                                  int y, int x, int length)
2090 {
2091     Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2092     const QTextureData &image = data->texture;
2093 
2094     const qreal cx = x + qreal(0.5);
2095     const qreal cy = y + qreal(0.5);
2096 
2097     constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
2098     const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2099     if (!useFetch)
2100         Q_ASSERT(layout->bpp == bpp);
2101     // When templated 'fetch' should be inlined at compile time:
2102     const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>);
2103 
2104     if (canUseFastMatrixPath(cx, cy, length, data)) {
2105         // The increment pr x in the scanline
2106         int fdx = (int)(data->m11 * fixed_scale);
2107         int fdy = (int)(data->m12 * fixed_scale);
2108 
2109         int fx = int((data->m21 * cy
2110                       + data->m11 * cx + data->dx) * fixed_scale);
2111         int fy = int((data->m22 * cy
2112                       + data->m12 * cx + data->dy) * fixed_scale);
2113 
2114         if (fdy == 0) { // simple scale, no rotation or shear
2115             int py = (fy >> 16);
2116             fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2117             const uchar *src = image.scanLine(py);
2118 
2119             int i = 0;
2120             if (blendType == BlendTransformed) {
2121                 int fastLen = length;
2122                 if (fdx > 0)
2123                     fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2124                 else if (fdx < 0)
2125                     fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
2126 
2127                 for (; i < fastLen; ++i) {
2128                     int x1 = (fx >> 16);
2129                     int x2 = x1;
2130                     fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
2131                     if (x1 == x2)
2132                         break;
2133                     if (useFetch)
2134                         buffer[i] = fetch(src, x1);
2135                     else
2136                         buffer[i] = reinterpret_cast<const T*>(src)[x1];
2137                     fx += fdx;
2138                 }
2139 
2140                 for (; i < fastLen; ++i) {
2141                     int px = (fx >> 16);
2142                     if (useFetch)
2143                         buffer[i] = fetch(src, px);
2144                     else
2145                         buffer[i] = reinterpret_cast<const T*>(src)[px];
2146                     fx += fdx;
2147                 }
2148             }
2149 
2150             for (; i < length; ++i) {
2151                 int px = (fx >> 16);
2152                 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2153                 if (useFetch)
2154                     buffer[i] = fetch(src, px);
2155                 else
2156                     buffer[i] = reinterpret_cast<const T*>(src)[px];
2157                 fx += fdx;
2158             }
2159         } else { // rotation or shear
2160             int i = 0;
2161             if (blendType == BlendTransformed) {
2162                 int fastLen = length;
2163                 if (fdx > 0)
2164                     fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2165                 else if (fdx < 0)
2166                     fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
2167                 if (fdy > 0)
2168                     fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
2169                 else if (fdy < 0)
2170                     fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
2171 
2172                 for (; i < fastLen; ++i) {
2173                     int x1 = (fx >> 16);
2174                     int y1 = (fy >> 16);
2175                     int x2 = x1;
2176                     int y2 = y1;
2177                     fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
2178                     fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1);
2179                     if (x1 == x2 && y1 == y2)
2180                         break;
2181                     if (useFetch)
2182                         buffer[i] = fetch(image.scanLine(y1), x1);
2183                     else
2184                         buffer[i] = reinterpret_cast<const T*>(image.scanLine(y1))[x1];
2185                     fx += fdx;
2186                     fy += fdy;
2187                 }
2188 
2189                 for (; i < fastLen; ++i) {
2190                     int px = (fx >> 16);
2191                     int py = (fy >> 16);
2192                     if (useFetch)
2193                         buffer[i] = fetch(image.scanLine(py), px);
2194                     else
2195                         buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
2196                     fx += fdx;
2197                     fy += fdy;
2198                 }
2199             }
2200 
2201             for (; i < length; ++i) {
2202                 int px = (fx >> 16);
2203                 int py = (fy >> 16);
2204                 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2205                 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2206                 if (useFetch)
2207                     buffer[i] = fetch(image.scanLine(py), px);
2208                 else
2209                     buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
2210                 fx += fdx;
2211                 fy += fdy;
2212             }
2213         }
2214     } else {
2215         const qreal fdx = data->m11;
2216         const qreal fdy = data->m12;
2217         const qreal fdw = data->m13;
2218 
2219         qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2220         qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2221         qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2222 
2223         T *const end = buffer + length;
2224         T *b = buffer;
2225         while (b < end) {
2226             const qreal iw = fw == 0 ? 1 : 1 / fw;
2227             const qreal tx = fx * iw;
2228             const qreal ty = fy * iw;
2229             int px = qFloor(tx);
2230             int py = qFloor(ty);
2231 
2232             fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2233             fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2234             if (useFetch)
2235                 *b = fetch(image.scanLine(py), px);
2236             else
2237                 *b = reinterpret_cast<const T*>(image.scanLine(py))[px];
2238 
2239             fx += fdx;
2240             fy += fdy;
2241             fw += fdw;
2242             //force increment to avoid /0
2243             if (!fw) {
2244                 fw += fdw;
2245             }
2246             ++b;
2247         }
2248     }
2249 }
2250 
2251 template<TextureBlendType blendType, QPixelLayout::BPP bpp>
fetchTransformed(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2252 static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data,
2253                                                 int y, int x, int length)
2254 {
2255     Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2256     const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2257     fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
2258     layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
2259     return buffer;
2260 }
2261 
2262 #if QT_CONFIG(raster_64bit)
2263 template<TextureBlendType blendType>  /* either BlendTransformed or BlendTransformedTiled */
fetchTransformed64(QRgba64 * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2264 static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data,
2265                                                      int y, int x, int length)
2266 {
2267     const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2268     if (layout->bpp != QPixelLayout::BPP64) {
2269         uint buffer32[BufferSize];
2270         Q_ASSERT(length <= BufferSize);
2271         if (layout->bpp == QPixelLayout::BPP32)
2272             fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
2273         else
2274             fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
2275         return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
2276     }
2277 
2278     fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length);
2279     if (data->texture.format == QImage::Format_RGBA64)
2280         convertRGBA64ToRGBA64PM(buffer, length);
2281     return buffer;
2282 }
2283 #endif
2284 
2285 /** \internal
2286   interpolate 4 argb pixels with the distx and disty factor.
2287   distx and disty must be between 0 and 16
2288  */
interpolate_4_pixels_16(uint tl,uint tr,uint bl,uint br,uint distx,uint disty)2289 static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
2290 {
2291     uint distxy = distx * disty;
2292     //idistx * disty = (16-distx) * disty = 16*disty - distxy
2293     //idistx * idisty = (16-distx) * (16-disty) = 16*16 - 16*distx -16*disty + distxy
2294     uint tlrb = (tl & 0x00ff00ff)         * (16*16 - 16*distx - 16*disty + distxy);
2295     uint tlag = ((tl & 0xff00ff00) >> 8)  * (16*16 - 16*distx - 16*disty + distxy);
2296     uint trrb = ((tr & 0x00ff00ff)        * (distx*16 - distxy));
2297     uint trag = (((tr & 0xff00ff00) >> 8) * (distx*16 - distxy));
2298     uint blrb = ((bl & 0x00ff00ff)        * (disty*16 - distxy));
2299     uint blag = (((bl & 0xff00ff00) >> 8) * (disty*16 - distxy));
2300     uint brrb = ((br & 0x00ff00ff)        * (distxy));
2301     uint brag = (((br & 0xff00ff00) >> 8) * (distxy));
2302     return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
2303 }
2304 
2305 #if defined(__SSE2__)
2306 #define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b)  \
2307 { \
2308     const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
2309     const __m128i distx_ = _mm_slli_epi16(distx, 4); \
2310     const __m128i disty_ = _mm_slli_epi16(disty, 4); \
2311     const __m128i idxidy =  _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
2312     const __m128i dxidy =  _mm_sub_epi16(distx_, dxdy); \
2313     const __m128i idxdy =  _mm_sub_epi16(disty_, dxdy); \
2314  \
2315     __m128i tlAG = _mm_srli_epi16(tl, 8); \
2316     __m128i tlRB = _mm_and_si128(tl, colorMask); \
2317     __m128i trAG = _mm_srli_epi16(tr, 8); \
2318     __m128i trRB = _mm_and_si128(tr, colorMask); \
2319     __m128i blAG = _mm_srli_epi16(bl, 8); \
2320     __m128i blRB = _mm_and_si128(bl, colorMask); \
2321     __m128i brAG = _mm_srli_epi16(br, 8); \
2322     __m128i brRB = _mm_and_si128(br, colorMask); \
2323  \
2324     tlAG = _mm_mullo_epi16(tlAG, idxidy); \
2325     tlRB = _mm_mullo_epi16(tlRB, idxidy); \
2326     trAG = _mm_mullo_epi16(trAG, dxidy); \
2327     trRB = _mm_mullo_epi16(trRB, dxidy); \
2328     blAG = _mm_mullo_epi16(blAG, idxdy); \
2329     blRB = _mm_mullo_epi16(blRB, idxdy); \
2330     brAG = _mm_mullo_epi16(brAG, dxdy); \
2331     brRB = _mm_mullo_epi16(brRB, dxdy); \
2332  \
2333     /* Add the values, and shift to only keep 8 significant bits per colors */ \
2334     __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
2335     __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
2336     rAG = _mm_andnot_si128(colorMask, rAG); \
2337     rRB = _mm_srli_epi16(rRB, 8); \
2338     _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
2339 }
2340 #endif
2341 
2342 #if defined(__ARM_NEON__)
2343 #define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b)  \
2344 { \
2345     const int16x8_t dxdy = vmulq_s16(distx, disty); \
2346     const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
2347     const int16x8_t idxidy =  vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
2348     const int16x8_t dxidy =  vsubq_s16(distx_, dxdy); \
2349     const int16x8_t idxdy =  vsubq_s16(disty_, dxdy); \
2350  \
2351     int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
2352     int16x8_t tlRB = vandq_s16(tl, colorMask); \
2353     int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
2354     int16x8_t trRB = vandq_s16(tr, colorMask); \
2355     int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
2356     int16x8_t blRB = vandq_s16(bl, colorMask); \
2357     int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
2358     int16x8_t brRB = vandq_s16(br, colorMask); \
2359  \
2360     int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
2361     int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
2362     rAG = vmlaq_s16(rAG, trAG, dxidy); \
2363     rRB = vmlaq_s16(rRB, trRB, dxidy); \
2364     rAG = vmlaq_s16(rAG, blAG, idxdy); \
2365     rRB = vmlaq_s16(rRB, blRB, idxdy); \
2366     rAG = vmlaq_s16(rAG, brAG, dxdy); \
2367     rRB = vmlaq_s16(rRB, brRB, dxdy); \
2368  \
2369     rAG = vandq_s16(invColorMask, rAG); \
2370     rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
2371     vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
2372 }
2373 #endif
2374 
2375 template<TextureBlendType blendType>
2376 void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
2377 
2378 template<>
fetchTransformedBilinear_pixelBounds(int max,int,int,int & v1,int & v2)2379 inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2)
2380 {
2381     v1 %= max;
2382     if (v1 < 0)
2383         v1 += max;
2384     v2 = v1 + 1;
2385     if (v2 == max)
2386         v2 = 0;
2387     Q_ASSERT(v1 >= 0 && v1 < max);
2388     Q_ASSERT(v2 >= 0 && v2 < max);
2389 }
2390 
2391 template<>
fetchTransformedBilinear_pixelBounds(int,int l1,int l2,int & v1,int & v2)2392 inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2)
2393 {
2394     if (v1 < l1)
2395         v2 = v1 = l1;
2396     else if (v1 >= l2)
2397         v2 = v1 = l2;
2398     else
2399         v2 = v1 + 1;
2400     Q_ASSERT(v1 >= l1 && v1 <= l2);
2401     Q_ASSERT(v2 >= l1 && v2 <= l2);
2402 }
2403 
2404 enum FastTransformTypes {
2405     SimpleScaleTransform,
2406     UpscaleTransform,
2407     DownscaleTransform,
2408     RotateTransform,
2409     FastRotateTransform,
2410     NFastTransformTypes
2411 };
2412 
2413 // Completes the partial interpolation stored in IntermediateBuffer.
2414 // by performing the x-axis interpolation and joining the RB and AG buffers.
intermediate_adder(uint * b,uint * end,const IntermediateBuffer & intermediate,int offset,int & fx,int fdx)2415 static void QT_FASTCALL intermediate_adder(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
2416 {
2417 #if defined(QT_COMPILER_SUPPORTS_AVX2)
2418     extern void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
2419     if (qCpuHasFeature(ArchHaswell))
2420         return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
2421 #endif
2422 
2423     // Switch to intermediate buffer coordinates
2424     fx -= offset * fixed_scale;
2425 
2426     while (b < end) {
2427         const int x = (fx >> 16);
2428 
2429         const uint distx = (fx & 0x0000ffff) >> 8;
2430         const uint idistx = 256 - distx;
2431         const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00;
2432         const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00;
2433         *b = (rb >> 8) | ag;
2434         b++;
2435         fx += fdx;
2436     }
2437     fx += offset * fixed_scale;
2438 }
2439 
2440 typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy);
2441 
2442 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_simple_scale_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int)2443 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
2444                                                                              int &fx, int &fy, int fdx, int /*fdy*/)
2445 {
2446     int y1 = (fy >> 16);
2447     int y2;
2448     fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2449     const uint *s1 = (const uint *)image.scanLine(y1);
2450     const uint *s2 = (const uint *)image.scanLine(y2);
2451 
2452     const int disty = (fy & 0x0000ffff) >> 8;
2453     const int idisty = 256 - disty;
2454     const int length = end - b;
2455 
2456     // The intermediate buffer is generated in the positive direction
2457     const int adjust = (fdx < 0) ? fdx * length : 0;
2458     const int offset = (fx + adjust) >> 16;
2459     int x = offset;
2460 
2461     IntermediateBuffer intermediate;
2462     // count is the size used in the intermediate.buffer.
2463     int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2;
2464     // length is supposed to be <= BufferSize either because data->m11 < 1 or
2465     // data->m11 < 2, and any larger buffers split
2466     Q_ASSERT(count <= BufferSize + 2);
2467     int f = 0;
2468     int lim = count;
2469     if (blendType == BlendTransformedBilinearTiled) {
2470         x %= image.width;
2471         if (x < 0) x += image.width;
2472     } else {
2473         lim = qMin(count, image.x2 - x);
2474         if (x < image.x1) {
2475             Q_ASSERT(x < image.x2);
2476             uint t = s1[image.x1];
2477             uint b = s2[image.x1];
2478             quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2479             quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2480             do {
2481                 intermediate.buffer_rb[f] = rb;
2482                 intermediate.buffer_ag[f] = ag;
2483                 f++;
2484                 x++;
2485             } while (x < image.x1 && f < lim);
2486         }
2487     }
2488 
2489     if (blendType != BlendTransformedBilinearTiled) {
2490 #if defined(__SSE2__)
2491         const __m128i disty_ = _mm_set1_epi16(disty);
2492         const __m128i idisty_ = _mm_set1_epi16(idisty);
2493         const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
2494 
2495         lim -= 3;
2496         for (; f < lim; x += 4, f += 4) {
2497             // Load 4 pixels from s1, and split the alpha-green and red-blue component
2498             __m128i top = _mm_loadu_si128((const __m128i*)((const uint *)(s1)+x));
2499             __m128i topAG = _mm_srli_epi16(top, 8);
2500             __m128i topRB = _mm_and_si128(top, colorMask);
2501             // Multiplies each color component by idisty
2502             topAG = _mm_mullo_epi16 (topAG, idisty_);
2503             topRB = _mm_mullo_epi16 (topRB, idisty_);
2504 
2505             // Same for the s2 vector
2506             __m128i bottom = _mm_loadu_si128((const __m128i*)((const uint *)(s2)+x));
2507             __m128i bottomAG = _mm_srli_epi16(bottom, 8);
2508             __m128i bottomRB = _mm_and_si128(bottom, colorMask);
2509             bottomAG = _mm_mullo_epi16 (bottomAG, disty_);
2510             bottomRB = _mm_mullo_epi16 (bottomRB, disty_);
2511 
2512             // Add the values, and shift to only keep 8 significant bits per colors
2513             __m128i rAG =_mm_add_epi16(topAG, bottomAG);
2514             rAG = _mm_srli_epi16(rAG, 8);
2515             _mm_storeu_si128((__m128i*)(&intermediate.buffer_ag[f]), rAG);
2516             __m128i rRB =_mm_add_epi16(topRB, bottomRB);
2517             rRB = _mm_srli_epi16(rRB, 8);
2518             _mm_storeu_si128((__m128i*)(&intermediate.buffer_rb[f]), rRB);
2519         }
2520 #elif defined(__ARM_NEON__)
2521         const int16x8_t disty_ = vdupq_n_s16(disty);
2522         const int16x8_t idisty_ = vdupq_n_s16(idisty);
2523         const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2524 
2525         lim -= 3;
2526         for (; f < lim; x += 4, f += 4) {
2527             // Load 4 pixels from s1, and split the alpha-green and red-blue component
2528             int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
2529             int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
2530             int16x8_t topRB = vandq_s16(top, colorMask);
2531             // Multiplies each color component by idisty
2532             topAG = vmulq_s16(topAG, idisty_);
2533             topRB = vmulq_s16(topRB, idisty_);
2534 
2535             // Same for the s2 vector
2536             int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
2537             int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
2538             int16x8_t bottomRB = vandq_s16(bottom, colorMask);
2539             bottomAG = vmulq_s16(bottomAG, disty_);
2540             bottomRB = vmulq_s16(bottomRB, disty_);
2541 
2542             // Add the values, and shift to only keep 8 significant bits per colors
2543             int16x8_t rAG = vaddq_s16(topAG, bottomAG);
2544             rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
2545             vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG);
2546             int16x8_t rRB = vaddq_s16(topRB, bottomRB);
2547             rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
2548             vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB);
2549         }
2550 #endif
2551     }
2552     for (; f < count; f++) { // Same as above but without simd
2553         if (blendType == BlendTransformedBilinearTiled) {
2554             if (x >= image.width) x -= image.width;
2555         } else {
2556             x = qMin(x, image.x2 - 1);
2557         }
2558 
2559         uint t = s1[x];
2560         uint b = s2[x];
2561 
2562         intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2563         intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2564         x++;
2565     }
2566 
2567     // Now interpolate the values from the intermediate.buffer to get the final result.
2568     intermediate_adder(b, end, intermediate, offset, fx, fdx);
2569 }
2570 
2571 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_upscale_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int)2572 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint *b, uint *end, const QTextureData &image,
2573                                                                         int &fx, int &fy, int fdx, int /*fdy*/)
2574 {
2575     int y1 = (fy >> 16);
2576     int y2;
2577     fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2578     const uint *s1 = (const uint *)image.scanLine(y1);
2579     const uint *s2 = (const uint *)image.scanLine(y2);
2580     const int disty = (fy & 0x0000ffff) >> 8;
2581 
2582     if (blendType != BlendTransformedBilinearTiled) {
2583         const qint64 min_fx = qint64(image.x1) * fixed_scale;
2584         const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2585         while (b < end) {
2586             int x1 = (fx >> 16);
2587             int x2;
2588             fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2589             if (x1 != x2)
2590                 break;
2591             uint top = s1[x1];
2592             uint bot = s2[x1];
2593             *b = INTERPOLATE_PIXEL_256(top, 256 - disty, bot, disty);
2594             fx += fdx;
2595             ++b;
2596         }
2597         uint *boundedEnd = end;
2598         if (fdx > 0)
2599             boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
2600         else if (fdx < 0)
2601             boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
2602 
2603         // A fast middle part without boundary checks
2604         while (b < boundedEnd) {
2605             int x = (fx >> 16);
2606             int distx = (fx & 0x0000ffff) >> 8;
2607             *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
2608             fx += fdx;
2609             ++b;
2610         }
2611     }
2612 
2613     while (b < end) {
2614         int x1 = (fx >> 16);
2615         int x2;
2616         fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1 , x1, x2);
2617         uint tl = s1[x1];
2618         uint tr = s1[x2];
2619         uint bl = s2[x1];
2620         uint br = s2[x2];
2621         int distx = (fx & 0x0000ffff) >> 8;
2622         *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2623 
2624         fx += fdx;
2625         ++b;
2626     }
2627 }
2628 
2629 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_downscale_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int)2630 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *b, uint *end, const QTextureData &image,
2631                                                                           int &fx, int &fy, int fdx, int /*fdy*/)
2632 {
2633     int y1 = (fy >> 16);
2634     int y2;
2635     fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2636     const uint *s1 = (const uint *)image.scanLine(y1);
2637     const uint *s2 = (const uint *)image.scanLine(y2);
2638     const int disty8 = (fy & 0x0000ffff) >> 8;
2639     const int disty4 = (disty8 + 0x08) >> 4;
2640 
2641     if (blendType != BlendTransformedBilinearTiled) {
2642         const qint64 min_fx = qint64(image.x1) * fixed_scale;
2643         const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2644         while (b < end) {
2645             int x1 = (fx >> 16);
2646             int x2;
2647             fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2648             if (x1 != x2)
2649                 break;
2650             uint top = s1[x1];
2651             uint bot = s2[x1];
2652             *b = INTERPOLATE_PIXEL_256(top, 256 - disty8, bot, disty8);
2653             fx += fdx;
2654             ++b;
2655         }
2656         uint *boundedEnd = end;
2657         if (fdx > 0)
2658             boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
2659         else if (fdx < 0)
2660             boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
2661         // A fast middle part without boundary checks
2662 #if defined(__SSE2__)
2663         const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
2664         const __m128i v_256 = _mm_set1_epi16(256);
2665         const __m128i v_disty = _mm_set1_epi16(disty4);
2666         const __m128i v_fdx = _mm_set1_epi32(fdx*4);
2667         const __m128i v_fx_r = _mm_set1_epi32(0x8);
2668         __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
2669 
2670         while (b < boundedEnd - 3) {
2671             __m128i offset = _mm_srli_epi32(v_fx, 16);
2672             const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2673             const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2674             const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2675             const int offset3 = _mm_cvtsi128_si32(offset);
2676             const __m128i tl = _mm_setr_epi32(s1[offset0], s1[offset1], s1[offset2], s1[offset3]);
2677             const __m128i tr = _mm_setr_epi32(s1[offset0 + 1], s1[offset1 + 1], s1[offset2 + 1], s1[offset3 + 1]);
2678             const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
2679             const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
2680 
2681             __m128i v_distx = _mm_srli_epi16(v_fx, 8);
2682             v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4);
2683             v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2684             v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2685 
2686             interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
2687             b += 4;
2688             v_fx = _mm_add_epi32(v_fx, v_fdx);
2689         }
2690         fx = _mm_cvtsi128_si32(v_fx);
2691 #elif defined(__ARM_NEON__)
2692         const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2693         const int16x8_t invColorMask = vmvnq_s16(colorMask);
2694         const int16x8_t v_256 = vdupq_n_s16(256);
2695         const int16x8_t v_disty = vdupq_n_s16(disty4);
2696         const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
2697         int32x4_t v_fdx = vdupq_n_s32(fdx*4);
2698 
2699         int32x4_t v_fx = vmovq_n_s32(fx);
2700         v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
2701         v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
2702         v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
2703 
2704         const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
2705         const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
2706 
2707         while (b < boundedEnd - 3) {
2708             uint32x4x2_t v_top, v_bot;
2709 
2710             int x1 = (fx >> 16);
2711             fx += fdx;
2712             v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
2713             v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
2714             x1 = (fx >> 16);
2715             fx += fdx;
2716             v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
2717             v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
2718             x1 = (fx >> 16);
2719             fx += fdx;
2720             v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
2721             v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
2722             x1 = (fx >> 16);
2723             fx += fdx;
2724             v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
2725             v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
2726 
2727             int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12);
2728             v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
2729 
2730             interpolate_4_pixels_16_neon(
2731                         vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
2732                     vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
2733                     vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
2734                     colorMask, invColorMask, v_256, b);
2735             b+=4;
2736             v_fx = vaddq_s32(v_fx, v_fdx);
2737         }
2738 #endif
2739         while (b < boundedEnd) {
2740             int x = (fx >> 16);
2741             if (hasFastInterpolate4()) {
2742                 int distx8 = (fx & 0x0000ffff) >> 8;
2743                 *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8);
2744             } else {
2745                 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
2746                 *b = interpolate_4_pixels_16(s1[x], s1[x + 1], s2[x], s2[x + 1], distx4, disty4);
2747             }
2748             fx += fdx;
2749             ++b;
2750         }
2751     }
2752 
2753     while (b < end) {
2754         int x1 = (fx >> 16);
2755         int x2;
2756         fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2757         uint tl = s1[x1];
2758         uint tr = s1[x2];
2759         uint bl = s2[x1];
2760         uint br = s2[x2];
2761         if (hasFastInterpolate4()) {
2762             int distx8 = (fx & 0x0000ffff) >> 8;
2763             *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8);
2764         } else {
2765             int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
2766             *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
2767         }
2768         fx += fdx;
2769         ++b;
2770     }
2771 }
2772 
2773 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_rotate_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int fdy)2774 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint *b, uint *end, const QTextureData &image,
2775                                                                        int &fx, int &fy, int fdx, int fdy)
2776 {
2777     // if we are zooming more than 8 times, we use 8bit precision for the position.
2778     while (b < end) {
2779         int x1 = (fx >> 16);
2780         int x2;
2781         int y1 = (fy >> 16);
2782         int y2;
2783 
2784         fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2785         fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2786 
2787         const uint *s1 = (const uint *)image.scanLine(y1);
2788         const uint *s2 = (const uint *)image.scanLine(y2);
2789 
2790         uint tl = s1[x1];
2791         uint tr = s1[x2];
2792         uint bl = s2[x1];
2793         uint br = s2[x2];
2794 
2795         int distx = (fx & 0x0000ffff) >> 8;
2796         int disty = (fy & 0x0000ffff) >> 8;
2797 
2798         *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2799 
2800         fx += fdx;
2801         fy += fdy;
2802         ++b;
2803     }
2804 }
2805 
2806 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int fdy)2807 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint *b, uint *end, const QTextureData &image,
2808                                                                             int &fx, int &fy, int fdx, int fdy)
2809 {
2810     //we are zooming less than 8x, use 4bit precision
2811     if (blendType != BlendTransformedBilinearTiled) {
2812         const qint64 min_fx = qint64(image.x1) * fixed_scale;
2813         const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2814         const qint64 min_fy = qint64(image.y1) * fixed_scale;
2815         const qint64 max_fy = qint64(image.y2 - 1) * fixed_scale;
2816         // first handle the possibly bounded part in the beginning
2817         while (b < end) {
2818             int x1 = (fx >> 16);
2819             int x2;
2820             int y1 = (fy >> 16);
2821             int y2;
2822             fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2823             fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2824             if (x1 != x2 && y1 != y2)
2825                 break;
2826             const uint *s1 = (const uint *)image.scanLine(y1);
2827             const uint *s2 = (const uint *)image.scanLine(y2);
2828             uint tl = s1[x1];
2829             uint tr = s1[x2];
2830             uint bl = s2[x1];
2831             uint br = s2[x2];
2832             if (hasFastInterpolate4()) {
2833                 int distx = (fx & 0x0000ffff) >> 8;
2834                 int disty = (fy & 0x0000ffff) >> 8;
2835                 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2836             } else {
2837                 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2838                 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2839                 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
2840             }
2841             fx += fdx;
2842             fy += fdy;
2843             ++b;
2844         }
2845         uint *boundedEnd = end;
2846         if (fdx > 0)
2847             boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
2848         else if (fdx < 0)
2849             boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
2850         if (fdy > 0)
2851             boundedEnd = qMin(boundedEnd, b + (max_fy - fy) / fdy);
2852         else if (fdy < 0)
2853             boundedEnd = qMin(boundedEnd, b + (min_fy - fy) / fdy);
2854 
2855         // until boundedEnd we can now have a fast middle part without boundary checks
2856 #if defined(__SSE2__)
2857         const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
2858         const __m128i v_256 = _mm_set1_epi16(256);
2859         const __m128i v_fdx = _mm_set1_epi32(fdx*4);
2860         const __m128i v_fdy = _mm_set1_epi32(fdy*4);
2861         const __m128i v_fxy_r = _mm_set1_epi32(0x8);
2862         __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
2863         __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
2864 
2865         const uchar *textureData = image.imageData;
2866         const qsizetype bytesPerLine = image.bytesPerLine;
2867         const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
2868 
2869         while (b < boundedEnd - 3) {
2870             const __m128i vy = _mm_packs_epi32(_mm_srli_epi32(v_fy, 16), _mm_setzero_si128());
2871             // 4x16bit * 4x16bit -> 4x32bit
2872             __m128i offset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epi16(vy, vbpl));
2873             offset = _mm_add_epi32(offset, _mm_srli_epi32(v_fx, 16));
2874             const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2875             const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2876             const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2877             const int offset3 = _mm_cvtsi128_si32(offset);
2878             const uint *topData = (const uint *)(textureData);
2879             const __m128i tl = _mm_setr_epi32(topData[offset0], topData[offset1], topData[offset2], topData[offset3]);
2880             const __m128i tr = _mm_setr_epi32(topData[offset0 + 1], topData[offset1 + 1], topData[offset2 + 1], topData[offset3 + 1]);
2881             const uint *bottomData = (const uint *)(textureData + bytesPerLine);
2882             const __m128i bl = _mm_setr_epi32(bottomData[offset0], bottomData[offset1], bottomData[offset2], bottomData[offset3]);
2883             const __m128i br = _mm_setr_epi32(bottomData[offset0 + 1], bottomData[offset1 + 1], bottomData[offset2 + 1], bottomData[offset3 + 1]);
2884 
2885             __m128i v_distx = _mm_srli_epi16(v_fx, 8);
2886             __m128i v_disty = _mm_srli_epi16(v_fy, 8);
2887             v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fxy_r), 4);
2888             v_disty = _mm_srli_epi16(_mm_add_epi32(v_disty, v_fxy_r), 4);
2889             v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2890             v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2891             v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
2892             v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
2893 
2894             interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
2895             b += 4;
2896             v_fx = _mm_add_epi32(v_fx, v_fdx);
2897             v_fy = _mm_add_epi32(v_fy, v_fdy);
2898         }
2899         fx = _mm_cvtsi128_si32(v_fx);
2900         fy = _mm_cvtsi128_si32(v_fy);
2901 #elif defined(__ARM_NEON__)
2902         const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2903         const int16x8_t invColorMask = vmvnq_s16(colorMask);
2904         const int16x8_t v_256 = vdupq_n_s16(256);
2905         int32x4_t v_fdx = vdupq_n_s32(fdx * 4);
2906         int32x4_t v_fdy = vdupq_n_s32(fdy * 4);
2907 
2908         const uchar *textureData = image.imageData;
2909         const int bytesPerLine = image.bytesPerLine;
2910 
2911         int32x4_t v_fx = vmovq_n_s32(fx);
2912         int32x4_t v_fy = vmovq_n_s32(fy);
2913         v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
2914         v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1);
2915         v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
2916         v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2);
2917         v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
2918         v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3);
2919 
2920         const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
2921         const int32x4_t v_round = vdupq_n_s32(0x0800);
2922 
2923         while (b < boundedEnd - 3) {
2924             uint32x4x2_t v_top, v_bot;
2925 
2926             int x1 = (fx >> 16);
2927             int y1 = (fy >> 16);
2928             fx += fdx; fy += fdy;
2929             const uchar *sl = textureData + bytesPerLine * y1;
2930             const uint *s1 = reinterpret_cast<const uint *>(sl);
2931             const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2932             v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
2933             v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
2934             x1 = (fx >> 16);
2935             y1 = (fy >> 16);
2936             fx += fdx; fy += fdy;
2937             sl = textureData + bytesPerLine * y1;
2938             s1 = reinterpret_cast<const uint *>(sl);
2939             s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2940             v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
2941             v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
2942             x1 = (fx >> 16);
2943             y1 = (fy >> 16);
2944             fx += fdx; fy += fdy;
2945             sl = textureData + bytesPerLine * y1;
2946             s1 = reinterpret_cast<const uint *>(sl);
2947             s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2948             v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
2949             v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
2950             x1 = (fx >> 16);
2951             y1 = (fy >> 16);
2952             fx += fdx; fy += fdy;
2953             sl = textureData + bytesPerLine * y1;
2954             s1 = reinterpret_cast<const uint *>(sl);
2955             s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2956             v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
2957             v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
2958 
2959             int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12);
2960             int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12);
2961             v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
2962             v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16));
2963             int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4);
2964 
2965             interpolate_4_pixels_16_neon(
2966                         vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
2967                         vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
2968                         vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
2969                         v_disty_, colorMask, invColorMask, v_256, b);
2970             b += 4;
2971             v_fx = vaddq_s32(v_fx, v_fdx);
2972             v_fy = vaddq_s32(v_fy, v_fdy);
2973         }
2974 #endif
2975         while (b < boundedEnd) {
2976             int x = (fx >> 16);
2977             int y = (fy >> 16);
2978 
2979             const uint *s1 = (const uint *)image.scanLine(y);
2980             const uint *s2 = (const uint *)image.scanLine(y + 1);
2981 
2982             if (hasFastInterpolate4()) {
2983                 int distx = (fx & 0x0000ffff) >> 8;
2984                 int disty = (fy & 0x0000ffff) >> 8;
2985                 *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
2986             } else {
2987                 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2988                 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2989                 *b = interpolate_4_pixels_16(s1[x], s1[x + 1], s2[x], s2[x + 1], distx, disty);
2990             }
2991 
2992             fx += fdx;
2993             fy += fdy;
2994             ++b;
2995         }
2996     }
2997 
2998     while (b < end) {
2999         int x1 = (fx >> 16);
3000         int x2;
3001         int y1 = (fy >> 16);
3002         int y2;
3003 
3004         fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3005         fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3006 
3007         const uint *s1 = (const uint *)image.scanLine(y1);
3008         const uint *s2 = (const uint *)image.scanLine(y2);
3009 
3010         uint tl = s1[x1];
3011         uint tr = s1[x2];
3012         uint bl = s2[x1];
3013         uint br = s2[x2];
3014 
3015         if (hasFastInterpolate4()) {
3016             int distx = (fx & 0x0000ffff) >> 8;
3017             int disty = (fy & 0x0000ffff) >> 8;
3018             *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
3019         } else {
3020             int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3021             int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3022             *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3023         }
3024 
3025         fx += fdx;
3026         fy += fdy;
3027         ++b;
3028     }
3029 }
3030 
3031 
3032 static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = {
3033     {
3034         fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>,
3035         fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
3036         fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
3037         fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
3038         fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
3039     },
3040     {
3041         fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>,
3042         fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
3043         fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
3044         fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
3045         fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled>
3046     }
3047 };
3048 
3049 template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */
fetchTransformedBilinearARGB32PM(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)3050 static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *,
3051                                                                  const QSpanData *data, int y, int x,
3052                                                                  int length)
3053 {
3054     const qreal cx = x + qreal(0.5);
3055     const qreal cy = y + qreal(0.5);
3056     Q_CONSTEXPR int tiled = (blendType == BlendTransformedBilinearTiled) ? 1 : 0;
3057 
3058     uint *end = buffer + length;
3059     uint *b = buffer;
3060     if (canUseFastMatrixPath(cx, cy, length, data)) {
3061         // The increment pr x in the scanline
3062         int fdx = (int)(data->m11 * fixed_scale);
3063         int fdy = (int)(data->m12 * fixed_scale);
3064 
3065         int fx = int((data->m21 * cy
3066                       + data->m11 * cx + data->dx) * fixed_scale);
3067         int fy = int((data->m22 * cy
3068                       + data->m12 * cx + data->dy) * fixed_scale);
3069 
3070         fx -= half_point;
3071         fy -= half_point;
3072 
3073         if (fdy == 0) { // simple scale, no rotation or shear
3074             if (qAbs(fdx) <= fixed_scale) {
3075                 // simple scale up on X
3076                 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3077             } else if (qAbs(fdx) <= 2 * fixed_scale) {
3078                 // simple scale down on X, less than 2x
3079                 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
3080                 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
3081                 if (mid != length)
3082                     bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
3083             } else if (qAbs(data->m22) < qreal(1./8.)) {
3084                 // scale up more than 8x (on Y)
3085                 bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3086             } else {
3087                 // scale down on X
3088                 bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3089             }
3090         } else { // rotation or shear
3091             if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) {
3092                 // if we are zooming more than 8 times, we use 8bit precision for the position.
3093                 bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
3094             } else {
3095                 // we are zooming less than 8x, use 4bit precision
3096                 bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
3097             }
3098         }
3099     } else {
3100         const QTextureData &image = data->texture;
3101 
3102         const qreal fdx = data->m11;
3103         const qreal fdy = data->m12;
3104         const qreal fdw = data->m13;
3105 
3106         qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3107         qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3108         qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3109 
3110         while (b < end) {
3111             const qreal iw = fw == 0 ? 1 : 1 / fw;
3112             const qreal px = fx * iw - qreal(0.5);
3113             const qreal py = fy * iw - qreal(0.5);
3114 
3115             int x1 = int(px) - (px < 0);
3116             int x2;
3117             int y1 = int(py) - (py < 0);
3118             int y2;
3119 
3120             int distx = int((px - x1) * 256);
3121             int disty = int((py - y1) * 256);
3122 
3123             fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3124             fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3125 
3126             const uint *s1 = (const uint *)data->texture.scanLine(y1);
3127             const uint *s2 = (const uint *)data->texture.scanLine(y2);
3128 
3129             uint tl = s1[x1];
3130             uint tr = s1[x2];
3131             uint bl = s2[x1];
3132             uint br = s2[x2];
3133 
3134             *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
3135 
3136             fx += fdx;
3137             fy += fdy;
3138             fw += fdw;
3139             //force increment to avoid /0
3140             if (!fw) {
3141                 fw += fdw;
3142             }
3143             ++b;
3144         }
3145     }
3146 
3147     return buffer;
3148 }
3149 
3150 template<TextureBlendType blendType>
fetchTransformedBilinear_simple_scale_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int)3151 static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
3152                                                                      int &fx, int &fy, int fdx, int /*fdy*/)
3153 {
3154     const QPixelLayout *layout = &qPixelLayouts[image.format];
3155     const QVector<QRgb> *clut = image.colorTable;
3156     const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM;
3157 
3158     int y1 = (fy >> 16);
3159     int y2;
3160     fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3161     const uchar *s1 = image.scanLine(y1);
3162     const uchar *s2 = image.scanLine(y2);
3163 
3164     const int disty = (fy & 0x0000ffff) >> 8;
3165     const int idisty = 256 - disty;
3166     const int length = end - b;
3167 
3168     // The intermediate buffer is generated in the positive direction
3169     const int adjust = (fdx < 0) ? fdx * length : 0;
3170     const int offset = (fx + adjust) >> 16;
3171     int x = offset;
3172 
3173     IntermediateBuffer intermediate;
3174     uint *buf1 = intermediate.buffer_rb;
3175     uint *buf2 = intermediate.buffer_ag;
3176     const uint *ptr1;
3177     const uint *ptr2;
3178 
3179     int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2;
3180     Q_ASSERT(count <= BufferSize + 2);
3181 
3182     if (blendType == BlendTransformedBilinearTiled) {
3183         x %= image.width;
3184         if (x < 0)
3185             x += image.width;
3186         int len1 = qMin(count, image.width - x);
3187         int len2 = qMin(x, count - len1);
3188 
3189         ptr1 = fetch(buf1, s1, x, len1, clut, nullptr);
3190         ptr2 = fetch(buf2, s2, x, len1, clut, nullptr);
3191         for (int i = 0; i < len1; ++i) {
3192             uint t = ptr1[i];
3193             uint b = ptr2[i];
3194             buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3195             buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3196         }
3197 
3198         if (len2) {
3199             ptr1 = fetch(buf1 + len1, s1, 0, len2, clut, nullptr);
3200             ptr2 = fetch(buf2 + len1, s2, 0, len2, clut, nullptr);
3201             for (int i = 0; i < len2; ++i) {
3202                 uint t = ptr1[i];
3203                 uint b = ptr2[i];
3204                 buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3205                 buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3206             }
3207         }
3208         // Generate the rest by repeatedly repeating the previous set of pixels
3209         for (int i = image.width; i < count; ++i) {
3210             buf1[i] = buf1[i - image.width];
3211             buf2[i] = buf2[i - image.width];
3212         }
3213     } else {
3214         int start = qMax(x, image.x1);
3215         int end = qMin(x + count, image.x2);
3216         int len = qMax(1, end - start);
3217         int leading = start - x;
3218 
3219         ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr);
3220         ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr);
3221 
3222         for (int i = 0; i < len; ++i) {
3223             uint t = ptr1[i];
3224             uint b = ptr2[i];
3225             buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3226             buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3227         }
3228 
3229         for (int i = 0; i < leading; ++i) {
3230             buf1[i] = buf1[leading];
3231             buf2[i] = buf2[leading];
3232         }
3233         for (int i = leading + len; i < count; ++i) {
3234             buf1[i] = buf1[i - 1];
3235             buf2[i] = buf2[i - 1];
3236         }
3237     }
3238 
3239     // Now interpolate the values from the intermediate.buffer to get the final result.
3240     intermediate_adder(b, end, intermediate, offset, fx, fdx);
3241 }
3242 
3243 
3244 template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
fetchTransformedBilinear_fetcher(T * buf1,T * buf2,const int len,const QTextureData & image,int fx,int fy,const int fdx,const int fdy)3245 static void QT_FASTCALL fetchTransformedBilinear_fetcher(T *buf1, T *buf2, const int len, const QTextureData &image,
3246                                                          int fx, int fy, const int fdx, const int fdy)
3247 {
3248     const QPixelLayout &layout = qPixelLayouts[image.format];
3249     constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
3250     if (useFetch)
3251         Q_ASSERT(sizeof(T) == sizeof(uint));
3252     else
3253         Q_ASSERT(layout.bpp == bpp);
3254     const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel<bpp>;
3255     if (fdy == 0) {
3256         int y1 = (fy >> 16);
3257         int y2;
3258         fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3259         const uchar *s1 = image.scanLine(y1);
3260         const uchar *s2 = image.scanLine(y2);
3261 
3262         int i = 0;
3263         if (blendType == BlendTransformedBilinear) {
3264             for (; i < len; ++i) {
3265                 int x1 = (fx >> 16);
3266                 int x2;
3267                 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3268                 if (x1 != x2)
3269                     break;
3270                 if (useFetch) {
3271                     buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
3272                     buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
3273                 } else {
3274                     buf1[i * 2 + 0] = buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x1];
3275                     buf2[i * 2 + 0] = buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x1];
3276                 }
3277                 fx += fdx;
3278             }
3279             int fastLen = len;
3280             if (fdx > 0)
3281                 fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
3282             else if (fdx < 0)
3283                 fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
3284 
3285             for (; i < fastLen; ++i) {
3286                 int x = (fx >> 16);
3287                 if (useFetch) {
3288                     buf1[i * 2 + 0] = fetch1(s1, x);
3289                     buf1[i * 2 + 1] = fetch1(s1, x + 1);
3290                     buf2[i * 2 + 0] = fetch1(s2, x);
3291                     buf2[i * 2 + 1] = fetch1(s2, x + 1);
3292                 } else {
3293                     buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
3294                     buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
3295                     buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
3296                     buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
3297                 }
3298                 fx += fdx;
3299             }
3300         }
3301 
3302         for (; i < len; ++i) {
3303             int x1 = (fx >> 16);
3304             int x2;
3305             fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3306             if (useFetch) {
3307                 buf1[i * 2 + 0] = fetch1(s1, x1);
3308                 buf1[i * 2 + 1] = fetch1(s1, x2);
3309                 buf2[i * 2 + 0] = fetch1(s2, x1);
3310                 buf2[i * 2 + 1] = fetch1(s2, x2);
3311             } else {
3312                 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3313                 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3314                 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3315                 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3316             }
3317             fx += fdx;
3318         }
3319     } else {
3320         int i = 0;
3321         if (blendType == BlendTransformedBilinear) {
3322             for (; i < len; ++i) {
3323                 int x1 = (fx >> 16);
3324                 int x2;
3325                 int y1 = (fy >> 16);
3326                 int y2;
3327                 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3328                 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3329                 if (x1 != x2 && y1 != y2)
3330                     break;
3331                 const uchar *s1 = image.scanLine(y1);
3332                 const uchar *s2 = image.scanLine(y2);
3333                 if (useFetch) {
3334                     buf1[i * 2 + 0] = fetch1(s1, x1);
3335                     buf1[i * 2 + 1] = fetch1(s1, x2);
3336                     buf2[i * 2 + 0] = fetch1(s2, x1);
3337                     buf2[i * 2 + 1] = fetch1(s2, x2);
3338                 } else {
3339                     buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3340                     buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3341                     buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3342                     buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3343                 }
3344                 fx += fdx;
3345                 fy += fdy;
3346             }
3347             int fastLen = len;
3348             if (fdx > 0)
3349                 fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
3350             else if (fdx < 0)
3351                 fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
3352             if (fdy > 0)
3353                 fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
3354             else if (fdy < 0)
3355                 fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
3356 
3357             for (; i < fastLen; ++i) {
3358                 int x = (fx >> 16);
3359                 int y = (fy >> 16);
3360                 const uchar *s1 = image.scanLine(y);
3361                 const uchar *s2 = s1 + image.bytesPerLine;
3362                 if (useFetch) {
3363                     buf1[i * 2 + 0] = fetch1(s1, x);
3364                     buf1[i * 2 + 1] = fetch1(s1, x + 1);
3365                     buf2[i * 2 + 0] = fetch1(s2, x);
3366                     buf2[i * 2 + 1] = fetch1(s2, x + 1);
3367                 } else {
3368                     buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
3369                     buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
3370                     buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
3371                     buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
3372                 }
3373                 fx += fdx;
3374                 fy += fdy;
3375             }
3376         }
3377 
3378         for (; i < len; ++i) {
3379             int x1 = (fx >> 16);
3380             int x2;
3381             int y1 = (fy >> 16);
3382             int y2;
3383             fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3384             fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3385 
3386             const uchar *s1 = image.scanLine(y1);
3387             const uchar *s2 = image.scanLine(y2);
3388             if (useFetch) {
3389                 buf1[i * 2 + 0] = fetch1(s1, x1);
3390                 buf1[i * 2 + 1] = fetch1(s1, x2);
3391                 buf2[i * 2 + 0] = fetch1(s2, x1);
3392                 buf2[i * 2 + 1] = fetch1(s2, x2);
3393             } else {
3394                 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3395                 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3396                 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3397                 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3398             }
3399             fx += fdx;
3400             fy += fdy;
3401         }
3402     }
3403 }
3404 
3405 // blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
3406 template<TextureBlendType blendType, QPixelLayout::BPP bpp>
fetchTransformedBilinear(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)3407 static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *,
3408                                                         const QSpanData *data, int y, int x, int length)
3409 {
3410     const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
3411     const QVector<QRgb> *clut = data->texture.colorTable;
3412     Q_ASSERT(bpp == QPixelLayout::BPPNone || layout->bpp == bpp);
3413 
3414     const qreal cx = x + qreal(0.5);
3415     const qreal cy = y + qreal(0.5);
3416 
3417     if (canUseFastMatrixPath(cx, cy, length, data)) {
3418         // The increment pr x in the scanline
3419         int fdx = (int)(data->m11 * fixed_scale);
3420         int fdy = (int)(data->m12 * fixed_scale);
3421 
3422         int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3423         int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3424 
3425         fx -= half_point;
3426         fy -= half_point;
3427 
3428         if (fdy == 0) { // simple scale, no rotation or shear
3429             if (qAbs(fdx) <= fixed_scale) { // scale up on X
3430                 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
3431             } else if (qAbs(fdx) <= 2 * fixed_scale) { // scale down on X less than 2x
3432                 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
3433                 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
3434                 if (mid != length)
3435                     fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
3436             } else {
3437                 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
3438 
3439                 uint buf1[BufferSize];
3440                 uint buf2[BufferSize];
3441                 uint *b = buffer;
3442                 while (length) {
3443                     int len = qMin(length, BufferSize / 2);
3444                     fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0);
3445                     layout->convertToARGB32PM(buf1, len * 2, clut);
3446                     layout->convertToARGB32PM(buf2, len * 2, clut);
3447 
3448                     if (hasFastInterpolate4() || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y)
3449                         int disty = (fy & 0x0000ffff) >> 8;
3450                         for (int i = 0; i < len; ++i) {
3451                             int distx = (fx & 0x0000ffff) >> 8;
3452                             b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
3453                             fx += fdx;
3454                         }
3455                     } else {
3456                         int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3457                         for (int i = 0; i < len; ++i) {
3458                             uint tl = buf1[i * 2 + 0];
3459                             uint tr = buf1[i * 2 + 1];
3460                             uint bl = buf2[i * 2 + 0];
3461                             uint br = buf2[i * 2 + 1];
3462                             int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3463                             b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3464                             fx += fdx;
3465                         }
3466                     }
3467                     length -= len;
3468                     b += len;
3469                 }
3470             }
3471         } else { // rotation or shear
3472             const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
3473 
3474             uint buf1[BufferSize];
3475             uint buf2[BufferSize];
3476             uint *b = buffer;
3477             while (length) {
3478                 int len = qMin(length, BufferSize / 2);
3479                 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3480                 layout->convertToARGB32PM(buf1, len * 2, clut);
3481                 layout->convertToARGB32PM(buf2, len * 2, clut);
3482 
3483                 if (hasFastInterpolate4() || qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.)) {
3484                     // If we are zooming more than 8 times, we use 8bit precision for the position.
3485                     for (int i = 0; i < len; ++i) {
3486                         int distx = (fx & 0x0000ffff) >> 8;
3487                         int disty = (fy & 0x0000ffff) >> 8;
3488 
3489                         b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
3490                         fx += fdx;
3491                         fy += fdy;
3492                     }
3493                 } else {
3494                     // We are zooming less than 8x, use 4bit precision
3495                     for (int i = 0; i < len; ++i) {
3496                         uint tl = buf1[i * 2 + 0];
3497                         uint tr = buf1[i * 2 + 1];
3498                         uint bl = buf2[i * 2 + 0];
3499                         uint br = buf2[i * 2 + 1];
3500 
3501                         int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3502                         int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3503 
3504                         b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3505                         fx += fdx;
3506                         fy += fdy;
3507                     }
3508                 }
3509 
3510                 length -= len;
3511                 b += len;
3512             }
3513         }
3514     } else {
3515         // When templated 'fetch' should be inlined at compile time:
3516         const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : fetchPixel<bpp>;
3517 
3518         const QTextureData &image = data->texture;
3519 
3520         const qreal fdx = data->m11;
3521         const qreal fdy = data->m12;
3522         const qreal fdw = data->m13;
3523 
3524         qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3525         qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3526         qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3527 
3528         uint buf1[BufferSize];
3529         uint buf2[BufferSize];
3530         uint *b = buffer;
3531 
3532         int distxs[BufferSize / 2];
3533         int distys[BufferSize / 2];
3534 
3535         while (length) {
3536             int len = qMin(length, BufferSize / 2);
3537             for (int i = 0; i < len; ++i) {
3538                 const qreal iw = fw == 0 ? 1 : 1 / fw;
3539                 const qreal px = fx * iw - qreal(0.5);
3540                 const qreal py = fy * iw - qreal(0.5);
3541 
3542                 int x1 = int(px) - (px < 0);
3543                 int x2;
3544                 int y1 = int(py) - (py < 0);
3545                 int y2;
3546 
3547                 distxs[i] = int((px - x1) * 256);
3548                 distys[i] = int((py - y1) * 256);
3549 
3550                 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3551                 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3552 
3553                 const uchar *s1 = data->texture.scanLine(y1);
3554                 const uchar *s2 = data->texture.scanLine(y2);
3555                 buf1[i * 2 + 0] = fetch1(s1, x1);
3556                 buf1[i * 2 + 1] = fetch1(s1, x2);
3557                 buf2[i * 2 + 0] = fetch1(s2, x1);
3558                 buf2[i * 2 + 1] = fetch1(s2, x2);
3559 
3560                 fx += fdx;
3561                 fy += fdy;
3562                 fw += fdw;
3563                 //force increment to avoid /0
3564                 if (!fw)
3565                     fw += fdw;
3566             }
3567 
3568             layout->convertToARGB32PM(buf1, len * 2, clut);
3569             layout->convertToARGB32PM(buf2, len * 2, clut);
3570 
3571             for (int i = 0; i < len; ++i) {
3572                 int distx = distxs[i];
3573                 int disty = distys[i];
3574 
3575                 b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
3576             }
3577             length -= len;
3578             b += len;
3579         }
3580     }
3581 
3582     return buffer;
3583 }
3584 
3585 #if QT_CONFIG(raster_64bit)
3586 template<TextureBlendType blendType>
fetchTransformedBilinear64_uint32(QRgba64 * buffer,const QSpanData * data,int y,int x,int length)3587 static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 *buffer, const QSpanData *data,
3588                                                                     int y, int x, int length)
3589 {
3590     const QTextureData &texture = data->texture;
3591     const QPixelLayout *layout = &qPixelLayouts[texture.format];
3592     const QVector<QRgb> *clut = data->texture.colorTable;
3593 
3594     const qreal cx = x + qreal(0.5);
3595     const qreal cy = y + qreal(0.5);
3596 
3597     uint sbuf1[BufferSize];
3598     uint sbuf2[BufferSize];
3599     alignas(8) QRgba64 buf1[BufferSize];
3600     alignas(8) QRgba64 buf2[BufferSize];
3601     QRgba64 *end = buffer + length;
3602     QRgba64 *b = buffer;
3603 
3604     if (canUseFastMatrixPath(cx, cy, length, data)) {
3605         // The increment pr x in the scanline
3606         const int fdx = (int)(data->m11 * fixed_scale);
3607         const int fdy = (int)(data->m12 * fixed_scale);
3608 
3609         int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3610         int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3611 
3612         fx -= half_point;
3613         fy -= half_point;
3614 
3615         const auto fetcher =
3616                 (layout->bpp == QPixelLayout::BPP32)
3617                         ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
3618                         : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
3619 
3620         if (fdy == 0) { //simple scale, no rotation
3621             while (length) {
3622                 int len = qMin(length, BufferSize / 2);
3623                 int disty = (fy & 0x0000ffff);
3624 #if defined(__SSE2__)
3625                 const __m128i vdy = _mm_set1_epi16(disty);
3626                 const __m128i vidy = _mm_set1_epi16(0x10000 - disty);
3627 #endif
3628                 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
3629 
3630                 layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3631                 if (disty)
3632                     layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3633 
3634                 for (int i = 0; i < len; ++i) {
3635                     int distx = (fx & 0x0000ffff);
3636 #if defined(__SSE2__)
3637                     __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2));
3638                     if (disty) {
3639                         __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2));
3640                         vt = _mm_mulhi_epu16(vt, vidy);
3641                         vb = _mm_mulhi_epu16(vb, vdy);
3642                         vt = _mm_add_epi16(vt, vb);
3643                     }
3644                     if (distx) {
3645                         const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
3646                         const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
3647                         vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
3648                         vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
3649                     }
3650                     _mm_storel_epi64((__m128i*)(b+i), vt);
3651 #else
3652                     b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3653 #endif
3654                     fx += fdx;
3655                 }
3656                 length -= len;
3657                 b += len;
3658             }
3659         } else { // rotation or shear
3660             while (b < end) {
3661                 int len = qMin(length, BufferSize / 2);
3662 
3663                 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
3664 
3665                 layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3666                 layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3667 
3668                 for (int i = 0; i < len; ++i) {
3669                     int distx = (fx & 0x0000ffff);
3670                     int disty = (fy & 0x0000ffff);
3671                     b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3672                     fx += fdx;
3673                     fy += fdy;
3674                 }
3675 
3676                 length -= len;
3677                 b += len;
3678             }
3679         }
3680     } else { // !(data->fast_matrix)
3681         const QTextureData &image = data->texture;
3682 
3683         const qreal fdx = data->m11;
3684         const qreal fdy = data->m12;
3685         const qreal fdw = data->m13;
3686 
3687         qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3688         qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3689         qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3690 
3691         FetchPixelFunc fetch = qFetchPixel[layout->bpp];
3692 
3693         int distxs[BufferSize / 2];
3694         int distys[BufferSize / 2];
3695 
3696         while (b < end) {
3697             int len = qMin(length, BufferSize / 2);
3698             for (int i = 0; i < len; ++i) {
3699                 const qreal iw = fw == 0 ? 1 : 1 / fw;
3700                 const qreal px = fx * iw - qreal(0.5);
3701                 const qreal py = fy * iw - qreal(0.5);
3702 
3703                 int x1 = qFloor(px);
3704                 int x2;
3705                 int y1 = qFloor(py);
3706                 int y2;
3707 
3708                 distxs[i] = int((px - x1) * (1<<16));
3709                 distys[i] = int((py - y1) * (1<<16));
3710 
3711                 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3712                 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3713 
3714                 const uchar *s1 = texture.scanLine(y1);
3715                 const uchar *s2 = texture.scanLine(y2);
3716 
3717                 sbuf1[i * 2 + 0] = fetch(s1, x1);
3718                 sbuf1[i * 2 + 1] = fetch(s1, x2);
3719                 sbuf2[i * 2 + 0] = fetch(s2, x1);
3720                 sbuf2[i * 2 + 1] = fetch(s2, x2);
3721 
3722                 fx += fdx;
3723                 fy += fdy;
3724                 fw += fdw;
3725                 //force increment to avoid /0
3726                 if (!fw)
3727                     fw += fdw;
3728             }
3729 
3730             layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3731             layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3732 
3733             for (int i = 0; i < len; ++i) {
3734                 int distx = distxs[i];
3735                 int disty = distys[i];
3736                 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3737             }
3738 
3739             length -= len;
3740             b += len;
3741         }
3742     }
3743     return buffer;
3744 }
3745 
3746 template<TextureBlendType blendType>
fetchTransformedBilinear64_uint64(QRgba64 * buffer,const QSpanData * data,int y,int x,int length)3747 static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 *buffer, const QSpanData *data,
3748                                                                     int y, int x, int length)
3749 {
3750     const QTextureData &texture = data->texture;
3751     Q_ASSERT(qPixelLayouts[texture.format].bpp == QPixelLayout::BPP64);
3752     const auto convert = (data->texture.format == QImage::Format_RGBA64) ? convertRGBA64ToRGBA64PM : convertRGBA64PMToRGBA64PM;
3753 
3754     const qreal cx = x + qreal(0.5);
3755     const qreal cy = y + qreal(0.5);
3756 
3757     alignas(8) QRgba64 buf1[BufferSize];
3758     alignas(8) QRgba64 buf2[BufferSize];
3759     QRgba64 *end = buffer + length;
3760     QRgba64 *b = buffer;
3761 
3762     if (canUseFastMatrixPath(cx, cy, length, data)) {
3763         // The increment pr x in the scanline
3764         const int fdx = (int)(data->m11 * fixed_scale);
3765         const int fdy = (int)(data->m12 * fixed_scale);
3766 
3767         int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3768         int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3769 
3770         fx -= half_point;
3771         fy -= half_point;
3772         const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
3773 
3774         if (fdy == 0) { //simple scale, no rotation
3775             while (length) {
3776                 int len = qMin(length, BufferSize / 2);
3777                 int disty = (fy & 0x0000ffff);
3778 #if defined(__SSE2__)
3779                 const __m128i vdy = _mm_set1_epi16(disty);
3780                 const __m128i vidy = _mm_set1_epi16(0x10000 - disty);
3781 #endif
3782                 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3783 
3784                 convert(buf1, len * 2);
3785                 if (disty)
3786                     convert(buf2, len * 2);
3787 
3788                 for (int i = 0; i < len; ++i) {
3789                     int distx = (fx & 0x0000ffff);
3790 #if defined(__SSE2__)
3791                     __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2));
3792                     if (disty) {
3793                         __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2));
3794                         vt = _mm_mulhi_epu16(vt, vidy);
3795                         vb = _mm_mulhi_epu16(vb, vdy);
3796                         vt = _mm_add_epi16(vt, vb);
3797                     }
3798                     if (distx) {
3799                         const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
3800                         const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
3801                         vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
3802                         vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
3803                     }
3804                     _mm_storel_epi64((__m128i*)(b+i), vt);
3805 #else
3806                     b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3807 #endif
3808                     fx += fdx;
3809                 }
3810                 length -= len;
3811                 b += len;
3812             }
3813         } else { // rotation or shear
3814             while (b < end) {
3815                 int len = qMin(length, BufferSize / 2);
3816 
3817                 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3818 
3819                 convert(buf1, len * 2);
3820                 convert(buf2, len * 2);
3821 
3822                 for (int i = 0; i < len; ++i) {
3823                     int distx = (fx & 0x0000ffff);
3824                     int disty = (fy & 0x0000ffff);
3825                     b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3826                     fx += fdx;
3827                     fy += fdy;
3828                 }
3829 
3830                 length -= len;
3831                 b += len;
3832             }
3833         }
3834     } else { // !(data->fast_matrix)
3835         const QTextureData &image = data->texture;
3836 
3837         const qreal fdx = data->m11;
3838         const qreal fdy = data->m12;
3839         const qreal fdw = data->m13;
3840 
3841         qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3842         qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3843         qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3844 
3845         int distxs[BufferSize / 2];
3846         int distys[BufferSize / 2];
3847 
3848         while (b < end) {
3849             int len = qMin(length, BufferSize / 2);
3850             for (int i = 0; i < len; ++i) {
3851                 const qreal iw = fw == 0 ? 1 : 1 / fw;
3852                 const qreal px = fx * iw - qreal(0.5);
3853                 const qreal py = fy * iw - qreal(0.5);
3854 
3855                 int x1 = int(px) - (px < 0);
3856                 int x2;
3857                 int y1 = int(py) - (py < 0);
3858                 int y2;
3859 
3860                 distxs[i] = int((px - x1) * (1<<16));
3861                 distys[i] = int((py - y1) * (1<<16));
3862 
3863                 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3864                 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3865 
3866                 const uchar *s1 = texture.scanLine(y1);
3867                 const uchar *s2 = texture.scanLine(y2);
3868 
3869                 buf1[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s1)[x1];
3870                 buf1[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s1)[x2];
3871                 buf2[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s2)[x1];
3872                 buf2[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s2)[x2];
3873 
3874                 fx += fdx;
3875                 fy += fdy;
3876                 fw += fdw;
3877                 //force increment to avoid /0
3878                 if (!fw)
3879                     fw += fdw;
3880             }
3881 
3882             convert(buf1, len * 2);
3883             convert(buf2, len * 2);
3884 
3885             for (int i = 0; i < len; ++i) {
3886                 int distx = distxs[i];
3887                 int disty = distys[i];
3888                 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3889             }
3890 
3891             length -= len;
3892             b += len;
3893         }
3894     }
3895     return buffer;
3896 }
3897 
3898 template<TextureBlendType blendType>
fetchTransformedBilinear64(QRgba64 * buffer,const Operator *,const QSpanData * data,int y,int x,int length)3899 static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *,
3900                                                              const QSpanData *data, int y, int x, int length)
3901 {
3902     if (qPixelLayouts[data->texture.format].bpp == QPixelLayout::BPP64)
3903         return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length);
3904     return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length);
3905 }
3906 #endif
3907 
3908 // FetchUntransformed can have more specialized methods added depending on SIMD features.
3909 static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = {
3910     nullptr,                    // Invalid
3911     fetchUntransformed,         // Mono
3912     fetchUntransformed,         // MonoLsb
3913     fetchUntransformed,         // Indexed8
3914     fetchUntransformedARGB32PM, // RGB32
3915     fetchUntransformed,         // ARGB32
3916     fetchUntransformedARGB32PM, // ARGB32_Premultiplied
3917     fetchUntransformedRGB16,    // RGB16
3918     fetchUntransformed,         // ARGB8565_Premultiplied
3919     fetchUntransformed,         // RGB666
3920     fetchUntransformed,         // ARGB6666_Premultiplied
3921     fetchUntransformed,         // RGB555
3922     fetchUntransformed,         // ARGB8555_Premultiplied
3923     fetchUntransformed,         // RGB888
3924     fetchUntransformed,         // RGB444
3925     fetchUntransformed,         // ARGB4444_Premultiplied
3926     fetchUntransformed,         // RGBX8888
3927     fetchUntransformed,         // RGBA8888
3928     fetchUntransformed,         // RGBA8888_Premultiplied
3929     fetchUntransformed,         // Format_BGR30
3930     fetchUntransformed,         // Format_A2BGR30_Premultiplied
3931     fetchUntransformed,         // Format_RGB30
3932     fetchUntransformed,         // Format_A2RGB30_Premultiplied
3933     fetchUntransformed,         // Alpha8
3934     fetchUntransformed,         // Grayscale8
3935     fetchUntransformed,         // RGBX64
3936     fetchUntransformed,         // RGBA64
3937     fetchUntransformed,         // RGBA64_Premultiplied
3938     fetchUntransformed,         // Grayscale16
3939     fetchUntransformed,         // BGR888
3940 };
3941 
3942 static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = {
3943     fetchUntransformed,                                                             // Untransformed
3944     fetchUntransformed,                                                             // Tiled
3945     fetchTransformed<BlendTransformed, QPixelLayout::BPPNone>,                      // Transformed
3946     fetchTransformed<BlendTransformedTiled, QPixelLayout::BPPNone>,                 // TransformedTiled
3947     fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPPNone>,      // TransformedBilinear
3948     fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPPNone>  // TransformedBilinearTiled
3949 };
3950 
3951 static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = {
3952     fetchUntransformedARGB32PM,                                     // Untransformed
3953     fetchUntransformedARGB32PM,                                     // Tiled
3954     fetchTransformed<BlendTransformed, QPixelLayout::BPP32>,        // Transformed
3955     fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>,   // TransformedTiled
3956     fetchTransformedBilinearARGB32PM<BlendTransformedBilinear>,     // Bilinear
3957     fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled
3958 };
3959 
3960 static SourceFetchProc sourceFetchAny16[NBlendTypes] = {
3961     fetchUntransformed,                                                             // Untransformed
3962     fetchUntransformed,                                                             // Tiled
3963     fetchTransformed<BlendTransformed, QPixelLayout::BPP16>,                        // Transformed
3964     fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>,                   // TransformedTiled
3965     fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>,        // TransformedBilinear
3966     fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16>    // TransformedBilinearTiled
3967 };
3968 
3969 static SourceFetchProc sourceFetchAny32[NBlendTypes] = {
3970     fetchUntransformed,                                                             // Untransformed
3971     fetchUntransformed,                                                             // Tiled
3972     fetchTransformed<BlendTransformed, QPixelLayout::BPP32>,                        // Transformed
3973     fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>,                   // TransformedTiled
3974     fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP32>,        // TransformedBilinear
3975     fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP32>    // TransformedBilinearTiled
3976 };
3977 
getSourceFetch(TextureBlendType blendType,QImage::Format format)3978 static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format)
3979 {
3980     if (format == QImage::Format_RGB32 || format == QImage::Format_ARGB32_Premultiplied)
3981         return sourceFetchARGB32PM[blendType];
3982     if (blendType == BlendUntransformed || blendType == BlendTiled)
3983         return sourceFetchUntransformed[format];
3984     if (qPixelLayouts[format].bpp == QPixelLayout::BPP16)
3985         return sourceFetchAny16[blendType];
3986     if (qPixelLayouts[format].bpp == QPixelLayout::BPP32)
3987         return sourceFetchAny32[blendType];
3988     return sourceFetchGeneric[blendType];
3989 }
3990 
3991 #if QT_CONFIG(raster_64bit)
3992 static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = {
3993     fetchUntransformed64,                                     // Untransformed
3994     fetchUntransformed64,                                     // Tiled
3995     fetchTransformed64<BlendTransformed>,                     // Transformed
3996     fetchTransformed64<BlendTransformedTiled>,                // TransformedTiled
3997     fetchTransformedBilinear64<BlendTransformedBilinear>,     // Bilinear
3998     fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
3999 };
4000 
4001 static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = {
4002     fetchUntransformedRGBA64PM,                               // Untransformed
4003     fetchUntransformedRGBA64PM,                               // Tiled
4004     fetchTransformed64<BlendTransformed>,                     // Transformed
4005     fetchTransformed64<BlendTransformedTiled>,                // TransformedTiled
4006     fetchTransformedBilinear64<BlendTransformedBilinear>,     // Bilinear
4007     fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
4008 };
4009 
getSourceFetch64(TextureBlendType blendType,QImage::Format format)4010 static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format)
4011 {
4012     if (format == QImage::Format_RGBX64 || format == QImage::Format_RGBA64_Premultiplied)
4013         return sourceFetchRGBA64PM[blendType];
4014     return sourceFetchGeneric64[blendType];
4015 }
4016 #endif
4017 
4018 
4019 #define FIXPT_BITS 8
4020 #define FIXPT_SIZE (1<<FIXPT_BITS)
4021 
qt_gradient_pixel_fixed(const QGradientData * data,int fixed_pos)4022 static uint qt_gradient_pixel_fixed(const QGradientData *data, int fixed_pos)
4023 {
4024     int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
4025     return data->colorTable32[qt_gradient_clamp(data, ipos)];
4026 }
4027 
4028 #if QT_CONFIG(raster_64bit)
qt_gradient_pixel64_fixed(const QGradientData * data,int fixed_pos)4029 static const QRgba64& qt_gradient_pixel64_fixed(const QGradientData *data, int fixed_pos)
4030 {
4031     int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
4032     return data->colorTable64[qt_gradient_clamp(data, ipos)];
4033 }
4034 #endif
4035 
getLinearGradientValues(LinearGradientValues * v,const QSpanData * data)4036 static void QT_FASTCALL getLinearGradientValues(LinearGradientValues *v, const QSpanData *data)
4037 {
4038     v->dx = data->gradient.linear.end.x - data->gradient.linear.origin.x;
4039     v->dy = data->gradient.linear.end.y - data->gradient.linear.origin.y;
4040     v->l = v->dx * v->dx + v->dy * v->dy;
4041     v->off = 0;
4042     if (v->l != 0) {
4043         v->dx /= v->l;
4044         v->dy /= v->l;
4045         v->off = -v->dx * data->gradient.linear.origin.x - v->dy * data->gradient.linear.origin.y;
4046     }
4047 }
4048 
4049 class GradientBase32
4050 {
4051 public:
4052     typedef uint Type;
null()4053     static Type null() { return 0; }
fetchSingle(const QGradientData & gradient,qreal v)4054     static Type fetchSingle(const QGradientData& gradient, qreal v)
4055     {
4056         return qt_gradient_pixel(&gradient, v);
4057     }
fetchSingle(const QGradientData & gradient,int v)4058     static Type fetchSingle(const QGradientData& gradient, int v)
4059     {
4060         return qt_gradient_pixel_fixed(&gradient, v);
4061     }
memfill(Type * buffer,Type fill,int length)4062     static void memfill(Type *buffer, Type fill, int length)
4063     {
4064         qt_memfill32(buffer, fill, length);
4065     }
4066 };
4067 
4068 #if QT_CONFIG(raster_64bit)
4069 class GradientBase64
4070 {
4071 public:
4072     typedef QRgba64 Type;
null()4073     static Type null() { return QRgba64::fromRgba64(0); }
fetchSingle(const QGradientData & gradient,qreal v)4074     static Type fetchSingle(const QGradientData& gradient, qreal v)
4075     {
4076         return qt_gradient_pixel64(&gradient, v);
4077     }
fetchSingle(const QGradientData & gradient,int v)4078     static Type fetchSingle(const QGradientData& gradient, int v)
4079     {
4080         return qt_gradient_pixel64_fixed(&gradient, v);
4081     }
memfill(Type * buffer,Type fill,int length)4082     static void memfill(Type *buffer, Type fill, int length)
4083     {
4084         qt_memfill64((quint64*)buffer, fill, length);
4085     }
4086 };
4087 #endif
4088 
4089 template<class GradientBase, typename BlendType>
qt_fetch_linear_gradient_template(BlendType * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4090 static inline const BlendType * QT_FASTCALL qt_fetch_linear_gradient_template(
4091         BlendType *buffer, const Operator *op, const QSpanData *data,
4092         int y, int x, int length)
4093 {
4094     const BlendType *b = buffer;
4095     qreal t, inc;
4096 
4097     bool affine = true;
4098     qreal rx=0, ry=0;
4099     if (op->linear.l == 0) {
4100         t = inc = 0;
4101     } else {
4102         rx = data->m21 * (y + qreal(0.5)) + data->m11 * (x + qreal(0.5)) + data->dx;
4103         ry = data->m22 * (y + qreal(0.5)) + data->m12 * (x + qreal(0.5)) + data->dy;
4104         t = op->linear.dx*rx + op->linear.dy*ry + op->linear.off;
4105         inc = op->linear.dx * data->m11 + op->linear.dy * data->m12;
4106         affine = !data->m13 && !data->m23;
4107 
4108         if (affine) {
4109             t *= (GRADIENT_STOPTABLE_SIZE - 1);
4110             inc *= (GRADIENT_STOPTABLE_SIZE - 1);
4111         }
4112     }
4113 
4114     const BlendType *end = buffer + length;
4115     if (affine) {
4116         if (inc > qreal(-1e-5) && inc < qreal(1e-5)) {
4117             GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, int(t * FIXPT_SIZE)), length);
4118         } else {
4119             if (t+inc*length < qreal(INT_MAX >> (FIXPT_BITS + 1)) &&
4120                 t+inc*length > qreal(INT_MIN >> (FIXPT_BITS + 1))) {
4121                 // we can use fixed point math
4122                 int t_fixed = int(t * FIXPT_SIZE);
4123                 int inc_fixed = int(inc * FIXPT_SIZE);
4124                 while (buffer < end) {
4125                     *buffer = GradientBase::fetchSingle(data->gradient, t_fixed);
4126                     t_fixed += inc_fixed;
4127                     ++buffer;
4128                 }
4129             } else {
4130                 // we have to fall back to float math
4131                 while (buffer < end) {
4132                     *buffer = GradientBase::fetchSingle(data->gradient, t/GRADIENT_STOPTABLE_SIZE);
4133                     t += inc;
4134                     ++buffer;
4135                 }
4136             }
4137         }
4138     } else { // fall back to float math here as well
4139         qreal rw = data->m23 * (y + qreal(0.5)) + data->m13 * (x + qreal(0.5)) + data->m33;
4140         while (buffer < end) {
4141             qreal x = rx/rw;
4142             qreal y = ry/rw;
4143             t = (op->linear.dx*x + op->linear.dy *y) + op->linear.off;
4144 
4145             *buffer = GradientBase::fetchSingle(data->gradient, t);
4146             rx += data->m11;
4147             ry += data->m12;
4148             rw += data->m13;
4149             if (!rw) {
4150                 rw += data->m13;
4151             }
4152             ++buffer;
4153         }
4154     }
4155 
4156     return b;
4157 }
4158 
qt_fetch_linear_gradient(uint * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4159 static const uint * QT_FASTCALL qt_fetch_linear_gradient(uint *buffer, const Operator *op, const QSpanData *data,
4160                                                          int y, int x, int length)
4161 {
4162     return qt_fetch_linear_gradient_template<GradientBase32, uint>(buffer, op, data, y, x, length);
4163 }
4164 
4165 #if QT_CONFIG(raster_64bit)
qt_fetch_linear_gradient_rgb64(QRgba64 * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4166 static const QRgba64 * QT_FASTCALL qt_fetch_linear_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
4167                                                                  int y, int x, int length)
4168 {
4169     return qt_fetch_linear_gradient_template<GradientBase64, QRgba64>(buffer, op, data, y, x, length);
4170 }
4171 #endif
4172 
getRadialGradientValues(RadialGradientValues * v,const QSpanData * data)4173 static void QT_FASTCALL getRadialGradientValues(RadialGradientValues *v, const QSpanData *data)
4174 {
4175     v->dx = data->gradient.radial.center.x - data->gradient.radial.focal.x;
4176     v->dy = data->gradient.radial.center.y - data->gradient.radial.focal.y;
4177 
4178     v->dr = data->gradient.radial.center.radius - data->gradient.radial.focal.radius;
4179     v->sqrfr = data->gradient.radial.focal.radius * data->gradient.radial.focal.radius;
4180 
4181     v->a = v->dr * v->dr - v->dx*v->dx - v->dy*v->dy;
4182     v->inv2a = 1 / (2 * v->a);
4183 
4184     v->extended = !qFuzzyIsNull(data->gradient.radial.focal.radius) || v->a <= 0;
4185 }
4186 
4187 template <class GradientBase>
4188 class RadialFetchPlain : public GradientBase
4189 {
4190 public:
4191     typedef typename GradientBase::Type BlendType;
fetch(BlendType * buffer,BlendType * end,const Operator * op,const QSpanData * data,qreal det,qreal delta_det,qreal delta_delta_det,qreal b,qreal delta_b)4192     static void fetch(BlendType *buffer, BlendType *end,
4193                       const Operator *op, const QSpanData *data, qreal det,
4194                       qreal delta_det, qreal delta_delta_det, qreal b, qreal delta_b)
4195     {
4196         if (op->radial.extended) {
4197             while (buffer < end) {
4198                 BlendType result = GradientBase::null();
4199                 if (det >= 0) {
4200                     qreal w = qSqrt(det) - b;
4201                     if (data->gradient.radial.focal.radius + op->radial.dr * w >= 0)
4202                         result = GradientBase::fetchSingle(data->gradient, w);
4203                 }
4204 
4205                 *buffer = result;
4206 
4207                 det += delta_det;
4208                 delta_det += delta_delta_det;
4209                 b += delta_b;
4210 
4211                 ++buffer;
4212             }
4213         } else {
4214             while (buffer < end) {
4215                 *buffer++ = GradientBase::fetchSingle(data->gradient, qSqrt(det) - b);
4216 
4217                 det += delta_det;
4218                 delta_det += delta_delta_det;
4219                 b += delta_b;
4220             }
4221         }
4222     }
4223 };
4224 
qt_fetch_radial_gradient_plain(uint * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4225 const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint *buffer, const Operator *op, const QSpanData *data,
4226                                                         int y, int x, int length)
4227 {
4228     return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase32>, uint>(buffer, op, data, y, x, length);
4229 }
4230 
4231 static SourceFetchProc qt_fetch_radial_gradient = qt_fetch_radial_gradient_plain;
4232 
4233 #if QT_CONFIG(raster_64bit)
qt_fetch_radial_gradient_rgb64(QRgba64 * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4234 const QRgba64 * QT_FASTCALL qt_fetch_radial_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
4235                                                         int y, int x, int length)
4236 {
4237     return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase64>, QRgba64>(buffer, op, data, y, x, length);
4238 }
4239 #endif
4240 
4241 template <class GradientBase, typename BlendType>
qt_fetch_conical_gradient_template(BlendType * buffer,const QSpanData * data,int y,int x,int length)4242 static inline const BlendType * QT_FASTCALL qt_fetch_conical_gradient_template(
4243         BlendType *buffer, const QSpanData *data,
4244         int y, int x, int length)
4245 {
4246     const BlendType *b = buffer;
4247     qreal rx = data->m21 * (y + qreal(0.5))
4248                + data->dx + data->m11 * (x + qreal(0.5));
4249     qreal ry = data->m22 * (y + qreal(0.5))
4250                + data->dy + data->m12 * (x + qreal(0.5));
4251     bool affine = !data->m13 && !data->m23;
4252 
4253     const qreal inv2pi = M_1_PI / 2.0;
4254 
4255     const BlendType *end = buffer + length;
4256     if (affine) {
4257         rx -= data->gradient.conical.center.x;
4258         ry -= data->gradient.conical.center.y;
4259         while (buffer < end) {
4260             qreal angle = qAtan2(ry, rx) + data->gradient.conical.angle;
4261 
4262             *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
4263 
4264             rx += data->m11;
4265             ry += data->m12;
4266             ++buffer;
4267         }
4268     } else {
4269         qreal rw = data->m23 * (y + qreal(0.5))
4270                    + data->m33 + data->m13 * (x + qreal(0.5));
4271         if (!rw)
4272             rw = 1;
4273         while (buffer < end) {
4274             qreal angle = qAtan2(ry/rw - data->gradient.conical.center.x,
4275                                 rx/rw - data->gradient.conical.center.y)
4276                           + data->gradient.conical.angle;
4277 
4278             *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
4279 
4280             rx += data->m11;
4281             ry += data->m12;
4282             rw += data->m13;
4283             if (!rw) {
4284                 rw += data->m13;
4285             }
4286             ++buffer;
4287         }
4288     }
4289     return b;
4290 }
4291 
qt_fetch_conical_gradient(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)4292 static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint *buffer, const Operator *, const QSpanData *data,
4293                                                           int y, int x, int length)
4294 {
4295     return qt_fetch_conical_gradient_template<GradientBase32, uint>(buffer, data, y, x, length);
4296 }
4297 
4298 #if QT_CONFIG(raster_64bit)
qt_fetch_conical_gradient_rgb64(QRgba64 * buffer,const Operator *,const QSpanData * data,int y,int x,int length)4299 static const QRgba64 * QT_FASTCALL qt_fetch_conical_gradient_rgb64(QRgba64 *buffer, const Operator *, const QSpanData *data,
4300                                                                    int y, int x, int length)
4301 {
4302     return qt_fetch_conical_gradient_template<GradientBase64, QRgba64>(buffer, data, y, x, length);
4303 }
4304 #endif
4305 
4306 extern CompositionFunctionSolid qt_functionForModeSolid_C[];
4307 extern CompositionFunctionSolid64 qt_functionForModeSolid64_C[];
4308 
4309 static const CompositionFunctionSolid *functionForModeSolid = qt_functionForModeSolid_C;
4310 #if QT_CONFIG(raster_64bit)
4311 static const CompositionFunctionSolid64 *functionForModeSolid64 = qt_functionForModeSolid64_C;
4312 #endif
4313 
4314 extern CompositionFunction qt_functionForMode_C[];
4315 extern CompositionFunction64 qt_functionForMode64_C[];
4316 
4317 static const CompositionFunction *functionForMode = qt_functionForMode_C;
4318 #if QT_CONFIG(raster_64bit)
4319 static const CompositionFunction64 *functionForMode64 = qt_functionForMode64_C;
4320 #endif
4321 
getBlendType(const QSpanData * data)4322 static TextureBlendType getBlendType(const QSpanData *data)
4323 {
4324     TextureBlendType ft;
4325     if (data->txop <= QTransform::TxTranslate)
4326         if (data->texture.type == QTextureData::Tiled)
4327             ft = BlendTiled;
4328         else
4329             ft = BlendUntransformed;
4330     else if (data->bilinear)
4331         if (data->texture.type == QTextureData::Tiled)
4332             ft = BlendTransformedBilinearTiled;
4333         else
4334             ft = BlendTransformedBilinear;
4335     else
4336         if (data->texture.type == QTextureData::Tiled)
4337             ft = BlendTransformedTiled;
4338         else
4339             ft = BlendTransformed;
4340     return ft;
4341 }
4342 
getOperator(const QSpanData * data,const QSpan * spans,int spanCount)4343 static inline Operator getOperator(const QSpanData *data, const QSpan *spans, int spanCount)
4344 {
4345     Operator op;
4346     bool solidSource = false;
4347 
4348     switch(data->type) {
4349     case QSpanData::Solid:
4350         solidSource = data->solidColor.isOpaque();
4351         op.srcFetch = nullptr;
4352 #if QT_CONFIG(raster_64bit)
4353         op.srcFetch64 = nullptr;
4354 #endif
4355         break;
4356     case QSpanData::LinearGradient:
4357         solidSource = !data->gradient.alphaColor;
4358         getLinearGradientValues(&op.linear, data);
4359         op.srcFetch = qt_fetch_linear_gradient;
4360 #if QT_CONFIG(raster_64bit)
4361         op.srcFetch64 = qt_fetch_linear_gradient_rgb64;
4362 #endif
4363         break;
4364     case QSpanData::RadialGradient:
4365         solidSource = !data->gradient.alphaColor;
4366         getRadialGradientValues(&op.radial, data);
4367         op.srcFetch = qt_fetch_radial_gradient;
4368 #if QT_CONFIG(raster_64bit)
4369         op.srcFetch64 = qt_fetch_radial_gradient_rgb64;
4370 #endif
4371         break;
4372     case QSpanData::ConicalGradient:
4373         solidSource = !data->gradient.alphaColor;
4374         op.srcFetch = qt_fetch_conical_gradient;
4375 #if QT_CONFIG(raster_64bit)
4376         op.srcFetch64 = qt_fetch_conical_gradient_rgb64;
4377 #endif
4378         break;
4379     case QSpanData::Texture:
4380         solidSource = !data->texture.hasAlpha;
4381         op.srcFetch = getSourceFetch(getBlendType(data), data->texture.format);
4382 #if QT_CONFIG(raster_64bit)
4383         op.srcFetch64 = getSourceFetch64(getBlendType(data), data->texture.format);;
4384 #endif
4385         break;
4386     default:
4387         Q_UNREACHABLE();
4388         break;
4389     }
4390 #if !QT_CONFIG(raster_64bit)
4391     op.srcFetch64 = 0;
4392 #endif
4393 
4394     op.mode = data->rasterBuffer->compositionMode;
4395     if (op.mode == QPainter::CompositionMode_SourceOver && solidSource)
4396         op.mode = QPainter::CompositionMode_Source;
4397 
4398     op.destFetch = destFetchProc[data->rasterBuffer->format];
4399 #if QT_CONFIG(raster_64bit)
4400     op.destFetch64 = destFetchProc64[data->rasterBuffer->format];
4401 #else
4402     op.destFetch64 = 0;
4403 #endif
4404     if (op.mode == QPainter::CompositionMode_Source &&
4405             (data->type != QSpanData::Texture || data->texture.const_alpha == 256)) {
4406         const QSpan *lastSpan = spans + spanCount;
4407         bool alphaSpans = false;
4408         while (spans < lastSpan) {
4409             if (spans->coverage != 255) {
4410                 alphaSpans = true;
4411                 break;
4412             }
4413             ++spans;
4414         }
4415         if (!alphaSpans && spanCount > 0) {
4416             // If all spans are opaque we do not need to fetch dest.
4417             // But don't clear passthrough destFetch as they are just as fast and save destStore.
4418             if (op.destFetch != destFetchARGB32P)
4419                 op.destFetch = destFetchUndefined;
4420 #if QT_CONFIG(raster_64bit)
4421             if (op.destFetch64 != destFetchRGB64)
4422                 op.destFetch64 = destFetch64Undefined;
4423 #endif
4424         }
4425     }
4426 
4427     op.destStore = destStoreProc[data->rasterBuffer->format];
4428     op.funcSolid = functionForModeSolid[op.mode];
4429     op.func = functionForMode[op.mode];
4430 #if QT_CONFIG(raster_64bit)
4431     op.destStore64 = destStoreProc64[data->rasterBuffer->format];
4432     op.funcSolid64 = functionForModeSolid64[op.mode];
4433     op.func64 = functionForMode64[op.mode];
4434 #else
4435     op.destStore64 = 0;
4436     op.funcSolid64 = 0;
4437     op.func64 = 0;
4438 #endif
4439 
4440     return op;
4441 }
4442 
spanfill_from_first(QRasterBuffer * rasterBuffer,QPixelLayout::BPP bpp,int x,int y,int length)4443 static void spanfill_from_first(QRasterBuffer *rasterBuffer, QPixelLayout::BPP bpp, int x, int y, int length)
4444 {
4445     switch (bpp) {
4446     case QPixelLayout::BPP64: {
4447         quint64 *dest = reinterpret_cast<quint64 *>(rasterBuffer->scanLine(y)) + x;
4448         qt_memfill_template(dest + 1, dest[0], length - 1);
4449         break;
4450     }
4451     case QPixelLayout::BPP32: {
4452         quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y)) + x;
4453         qt_memfill_template(dest + 1, dest[0], length - 1);
4454         break;
4455     }
4456     case QPixelLayout::BPP24: {
4457         quint24 *dest = reinterpret_cast<quint24 *>(rasterBuffer->scanLine(y)) + x;
4458         qt_memfill_template(dest + 1, dest[0], length - 1);
4459         break;
4460     }
4461     case QPixelLayout::BPP16: {
4462         quint16 *dest = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
4463         qt_memfill_template(dest + 1, dest[0], length - 1);
4464         break;
4465     }
4466     case QPixelLayout::BPP8: {
4467         uchar *dest = rasterBuffer->scanLine(y) + x;
4468         memset(dest + 1, dest[0], length - 1);
4469         break;
4470     }
4471     default:
4472         Q_UNREACHABLE();
4473     }
4474 }
4475 
4476 
4477 // -------------------- blend methods ---------------------
4478 
blend_color_generic(int count,const QSpan * spans,void * userData)4479 static void blend_color_generic(int count, const QSpan *spans, void *userData)
4480 {
4481     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4482     uint buffer[BufferSize];
4483     Operator op = getOperator(data, nullptr, 0);
4484     const uint color = data->solidColor.toArgb32();
4485     const bool solidFill = op.mode == QPainter::CompositionMode_Source;
4486     const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
4487 
4488     while (count--) {
4489         int x = spans->x;
4490         int length = spans->len;
4491         if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length) {
4492             // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
4493             op.destStore(data->rasterBuffer, x, spans->y, &color, 1);
4494             spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
4495             length = 0;
4496         }
4497 
4498         while (length) {
4499             int l = qMin(BufferSize, length);
4500             uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
4501             op.funcSolid(dest, l, color, spans->coverage);
4502             if (op.destStore)
4503                 op.destStore(data->rasterBuffer, x, spans->y, dest, l);
4504             length -= l;
4505             x += l;
4506         }
4507         ++spans;
4508     }
4509 }
4510 
blend_color_argb(int count,const QSpan * spans,void * userData)4511 static void blend_color_argb(int count, const QSpan *spans, void *userData)
4512 {
4513     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4514 
4515     const Operator op = getOperator(data, nullptr, 0);
4516     const uint color = data->solidColor.toArgb32();
4517 
4518     if (op.mode == QPainter::CompositionMode_Source) {
4519         // inline for performance
4520         while (count--) {
4521             uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
4522             if (spans->coverage == 255) {
4523                 qt_memfill(target, color, spans->len);
4524 #ifdef __SSE2__
4525             } else if (spans->len > 16) {
4526                 op.funcSolid(target, spans->len, color, spans->coverage);
4527 #endif
4528             } else {
4529                 uint c = BYTE_MUL(color, spans->coverage);
4530                 int ialpha = 255 - spans->coverage;
4531                 for (int i = 0; i < spans->len; ++i)
4532                     target[i] = c + BYTE_MUL(target[i], ialpha);
4533             }
4534             ++spans;
4535         }
4536         return;
4537     }
4538 
4539     while (count--) {
4540         uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
4541         op.funcSolid(target, spans->len, color, spans->coverage);
4542         ++spans;
4543     }
4544 }
4545 
blend_color_generic_rgb64(int count,const QSpan * spans,void * userData)4546 void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData)
4547 {
4548 #if QT_CONFIG(raster_64bit)
4549     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4550     Operator op = getOperator(data, nullptr, 0);
4551     if (!op.funcSolid64) {
4552         qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit");
4553         return blend_color_generic(count, spans, userData);
4554     }
4555 
4556     alignas(8) QRgba64 buffer[BufferSize];
4557     const QRgba64 color = data->solidColor;
4558     const bool solidFill = op.mode == QPainter::CompositionMode_Source;
4559     const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
4560 
4561     while (count--) {
4562         int x = spans->x;
4563         int length = spans->len;
4564         if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length && op.destStore64) {
4565             // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
4566             op.destStore64(data->rasterBuffer, x, spans->y, &color, 1);
4567             spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
4568             length = 0;
4569         }
4570 
4571         while (length) {
4572             int l = qMin(BufferSize, length);
4573             QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
4574             op.funcSolid64(dest, l, color, spans->coverage);
4575             if (op.destStore64)
4576                 op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
4577             length -= l;
4578             x += l;
4579         }
4580         ++spans;
4581     }
4582 #else
4583     blend_color_generic(count, spans, userData);
4584 #endif
4585 }
4586 
blend_color_rgb16(int count,const QSpan * spans,void * userData)4587 static void blend_color_rgb16(int count, const QSpan *spans, void *userData)
4588 {
4589     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4590 
4591     /*
4592         We duplicate a little logic from getOperator() and calculate the
4593         composition mode directly.  This allows blend_color_rgb16 to be used
4594         from qt_gradient_quint16 with minimal overhead.
4595      */
4596     QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
4597     if (mode == QPainter::CompositionMode_SourceOver && data->solidColor.isOpaque())
4598         mode = QPainter::CompositionMode_Source;
4599 
4600     if (mode == QPainter::CompositionMode_Source) {
4601         // inline for performance
4602         ushort c = data->solidColor.toRgb16();
4603         for (; count--; spans++) {
4604             if (!spans->len)
4605                 continue;
4606             ushort *target = ((ushort *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
4607             if (spans->coverage == 255) {
4608                 qt_memfill(target, c, spans->len);
4609             } else {
4610                 ushort color = BYTE_MUL_RGB16(c, spans->coverage);
4611                 int ialpha = 255 - spans->coverage;
4612                 const ushort *end = target + spans->len;
4613                 while (target < end) {
4614                     *target = color + BYTE_MUL_RGB16(*target, ialpha);
4615                     ++target;
4616                 }
4617             }
4618         }
4619         return;
4620     }
4621 
4622     if (mode == QPainter::CompositionMode_SourceOver) {
4623         for (; count--; spans++) {
4624             if (!spans->len)
4625                 continue;
4626             uint color = BYTE_MUL(data->solidColor.toArgb32(), spans->coverage);
4627             int ialpha = qAlpha(~color);
4628             ushort c = qConvertRgb32To16(color);
4629             ushort *target = ((ushort *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
4630             int len = spans->len;
4631             bool pre = (((quintptr)target) & 0x3) != 0;
4632             bool post = false;
4633             if (pre) {
4634                 // skip to word boundary
4635                 *target = c + BYTE_MUL_RGB16(*target, ialpha);
4636                 ++target;
4637                 --len;
4638             }
4639             if (len & 0x1) {
4640                 post = true;
4641                 --len;
4642             }
4643             uint *target32 = (uint*)target;
4644             uint c32 = c | (c<<16);
4645             len >>= 1;
4646             uint salpha = (ialpha+1) >> 3; // calculate here rather than in loop
4647             while (len--) {
4648                 // blend full words
4649                 *target32 = c32 + BYTE_MUL_RGB16_32(*target32, salpha);
4650                 ++target32;
4651                 target += 2;
4652             }
4653             if (post) {
4654                 // one last pixel beyond a full word
4655                 *target = c + BYTE_MUL_RGB16(*target, ialpha);
4656             }
4657         }
4658         return;
4659     }
4660 
4661     blend_color_generic(count, spans, userData);
4662 }
4663 
4664 template <typename T>
handleSpans(int count,const QSpan * spans,const QSpanData * data,T & handler)4665 void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handler)
4666 {
4667     uint const_alpha = 256;
4668     if (data->type == QSpanData::Texture)
4669         const_alpha = data->texture.const_alpha;
4670 
4671     int coverage = 0;
4672     while (count) {
4673         if (!spans->len) {
4674             ++spans;
4675             --count;
4676             continue;
4677         }
4678         int x = spans->x;
4679         const int y = spans->y;
4680         int right = x + spans->len;
4681 
4682         // compute length of adjacent spans
4683         for (int i = 1; i < count && spans[i].y == y && spans[i].x == right; ++i)
4684             right += spans[i].len;
4685         int length = right - x;
4686 
4687         while (length) {
4688             int l = qMin(BufferSize, length);
4689             length -= l;
4690 
4691             int process_length = l;
4692             int process_x = x;
4693 
4694             const typename T::BlendType *src = handler.fetch(process_x, y, process_length);
4695             int offset = 0;
4696             while (l > 0) {
4697                 if (x == spans->x) // new span?
4698                     coverage = (spans->coverage * const_alpha) >> 8;
4699 
4700                 int right = spans->x + spans->len;
4701                 int len = qMin(l, right - x);
4702 
4703                 handler.process(x, y, len, coverage, src, offset);
4704 
4705                 l -= len;
4706                 x += len;
4707                 offset += len;
4708 
4709                 if (x == right) { // done with current span?
4710                     ++spans;
4711                     --count;
4712                 }
4713             }
4714             handler.store(process_x, y, process_length);
4715         }
4716     }
4717 }
4718 
4719 template<typename T>
4720 struct QBlendBase
4721 {
4722     typedef T BlendType;
QBlendBaseQBlendBase4723     QBlendBase(QSpanData *d, const Operator &o)
4724         : data(d)
4725         , op(o)
4726         , dest(nullptr)
4727     {
4728     }
4729 
4730     QSpanData *data;
4731     Operator op;
4732 
4733     BlendType *dest;
4734 
4735     alignas(8) BlendType buffer[BufferSize];
4736     alignas(8) BlendType src_buffer[BufferSize];
4737 };
4738 
4739 class BlendSrcGeneric : public QBlendBase<uint>
4740 {
4741 public:
BlendSrcGeneric(QSpanData * d,const Operator & o)4742     BlendSrcGeneric(QSpanData *d, const Operator &o)
4743         : QBlendBase<uint>(d, o)
4744     {
4745     }
4746 
fetch(int x,int y,int len)4747     const uint *fetch(int x, int y, int len)
4748     {
4749         dest = op.destFetch(buffer, data->rasterBuffer, x, y, len);
4750         return op.srcFetch(src_buffer, &op, data, y, x, len);
4751     }
4752 
process(int,int,int len,int coverage,const uint * src,int offset)4753     void process(int, int, int len, int coverage, const uint *src, int offset)
4754     {
4755         op.func(dest + offset, src + offset, len, coverage);
4756     }
4757 
store(int x,int y,int len)4758     void store(int x, int y, int len)
4759     {
4760         if (op.destStore)
4761             op.destStore(data->rasterBuffer, x, y, dest, len);
4762     }
4763 };
4764 
4765 #if QT_CONFIG(raster_64bit)
4766 class BlendSrcGenericRGB64 : public QBlendBase<QRgba64>
4767 {
4768 public:
BlendSrcGenericRGB64(QSpanData * d,const Operator & o)4769     BlendSrcGenericRGB64(QSpanData *d, const Operator &o)
4770         : QBlendBase<QRgba64>(d, o)
4771     {
4772     }
4773 
isSupported() const4774     bool isSupported() const
4775     {
4776         return op.func64 && op.destFetch64;
4777     }
4778 
fetch(int x,int y,int len)4779     const QRgba64 *fetch(int x, int y, int len)
4780     {
4781         dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len);
4782         return op.srcFetch64(src_buffer, &op, data, y, x, len);
4783     }
4784 
process(int,int,int len,int coverage,const QRgba64 * src,int offset)4785     void process(int, int, int len, int coverage, const QRgba64 *src, int offset)
4786     {
4787         op.func64(dest + offset, src + offset, len, coverage);
4788     }
4789 
store(int x,int y,int len)4790     void store(int x, int y, int len)
4791     {
4792         if (op.destStore64)
4793             op.destStore64(data->rasterBuffer, x, y, dest, len);
4794     }
4795 };
4796 #endif
4797 
blend_src_generic(int count,const QSpan * spans,void * userData)4798 static void blend_src_generic(int count, const QSpan *spans, void *userData)
4799 {
4800     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4801     BlendSrcGeneric blend(data, getOperator(data, spans, count));
4802     handleSpans(count, spans, data, blend);
4803 }
4804 
4805 #if QT_CONFIG(raster_64bit)
blend_src_generic_rgb64(int count,const QSpan * spans,void * userData)4806 static void blend_src_generic_rgb64(int count, const QSpan *spans, void *userData)
4807 {
4808     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4809     Operator op = getOperator(data, spans, count);
4810     BlendSrcGenericRGB64 blend64(data, op);
4811     if (blend64.isSupported())
4812         handleSpans(count, spans, data, blend64);
4813     else {
4814         qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4815         BlendSrcGeneric blend32(data, op);
4816         handleSpans(count, spans, data, blend32);
4817     }
4818 }
4819 #endif
4820 
blend_untransformed_generic(int count,const QSpan * spans,void * userData)4821 static void blend_untransformed_generic(int count, const QSpan *spans, void *userData)
4822 {
4823     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4824 
4825     uint buffer[BufferSize];
4826     uint src_buffer[BufferSize];
4827     Operator op = getOperator(data, spans, count);
4828 
4829     const int image_width = data->texture.width;
4830     const int image_height = data->texture.height;
4831     int xoff = -qRound(-data->dx);
4832     int yoff = -qRound(-data->dy);
4833 
4834     for (; count--; spans++) {
4835         if (!spans->len)
4836             continue;
4837         int x = spans->x;
4838         int length = spans->len;
4839         int sx = xoff + x;
4840         int sy = yoff + spans->y;
4841         if (sy >= 0 && sy < image_height && sx < image_width) {
4842             if (sx < 0) {
4843                 x -= sx;
4844                 length += sx;
4845                 sx = 0;
4846             }
4847             if (sx + length > image_width)
4848                 length = image_width - sx;
4849             if (length > 0) {
4850                 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4851                 while (length) {
4852                     int l = qMin(BufferSize, length);
4853                     const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
4854                     uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
4855                     op.func(dest, src, l, coverage);
4856                     if (op.destStore)
4857                         op.destStore(data->rasterBuffer, x, spans->y, dest, l);
4858                     x += l;
4859                     sx += l;
4860                     length -= l;
4861                 }
4862             }
4863         }
4864     }
4865 }
4866 
4867 #if QT_CONFIG(raster_64bit)
blend_untransformed_generic_rgb64(int count,const QSpan * spans,void * userData)4868 static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, void *userData)
4869 {
4870     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4871 
4872     Operator op = getOperator(data, spans, count);
4873     if (!op.func64) {
4874         qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4875         return blend_untransformed_generic(count, spans, userData);
4876     }
4877     alignas(8) QRgba64 buffer[BufferSize];
4878     alignas(8) QRgba64 src_buffer[BufferSize];
4879 
4880     const int image_width = data->texture.width;
4881     const int image_height = data->texture.height;
4882     int xoff = -qRound(-data->dx);
4883     int yoff = -qRound(-data->dy);
4884 
4885     for (; count--; spans++) {
4886         if (!spans->len)
4887             continue;
4888         int x = spans->x;
4889         int length = spans->len;
4890         int sx = xoff + x;
4891         int sy = yoff + spans->y;
4892         if (sy >= 0 && sy < image_height && sx < image_width) {
4893             if (sx < 0) {
4894                 x -= sx;
4895                 length += sx;
4896                 sx = 0;
4897             }
4898             if (sx + length > image_width)
4899                 length = image_width - sx;
4900             if (length > 0) {
4901                 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4902                 while (length) {
4903                     int l = qMin(BufferSize, length);
4904                     const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4905                     QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
4906                     op.func64(dest, src, l, coverage);
4907                     if (op.destStore64)
4908                         op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
4909                     x += l;
4910                     sx += l;
4911                     length -= l;
4912                 }
4913             }
4914         }
4915     }
4916 }
4917 #endif
4918 
blend_untransformed_argb(int count,const QSpan * spans,void * userData)4919 static void blend_untransformed_argb(int count, const QSpan *spans, void *userData)
4920 {
4921     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4922     if (data->texture.format != QImage::Format_ARGB32_Premultiplied
4923         && data->texture.format != QImage::Format_RGB32) {
4924         blend_untransformed_generic(count, spans, userData);
4925         return;
4926     }
4927 
4928     Operator op = getOperator(data, spans, count);
4929 
4930     const int image_width = data->texture.width;
4931     const int image_height = data->texture.height;
4932     int xoff = -qRound(-data->dx);
4933     int yoff = -qRound(-data->dy);
4934 
4935     for (; count--; spans++) {
4936         if (!spans->len)
4937             continue;
4938         int x = spans->x;
4939         int length = spans->len;
4940         int sx = xoff + x;
4941         int sy = yoff + spans->y;
4942         if (sy >= 0 && sy < image_height && sx < image_width) {
4943             if (sx < 0) {
4944                 x -= sx;
4945                 length += sx;
4946                 sx = 0;
4947             }
4948             if (sx + length > image_width)
4949                 length = image_width - sx;
4950             if (length > 0) {
4951                 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4952                 const uint *src = (const uint *)data->texture.scanLine(sy) + sx;
4953                 uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x;
4954                 op.func(dest, src, length, coverage);
4955             }
4956         }
4957     }
4958 }
4959 
interpolate_pixel_rgb16_255(quint16 x,quint8 a,quint16 y,quint8 b)4960 static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a,
4961                                                   quint16 y, quint8 b)
4962 {
4963     quint16 t = ((((x & 0x07e0) * a) + ((y & 0x07e0) * b)) >> 5) & 0x07e0;
4964     t |= ((((x & 0xf81f) * a) + ((y & 0xf81f) * b)) >> 5) & 0xf81f;
4965 
4966     return t;
4967 }
4968 
interpolate_pixel_rgb16x2_255(quint32 x,quint8 a,quint32 y,quint8 b)4969 static inline quint32 interpolate_pixel_rgb16x2_255(quint32 x, quint8 a,
4970                                                     quint32 y, quint8 b)
4971 {
4972     uint t;
4973     t = ((((x & 0xf81f07e0) >> 5) * a) + (((y & 0xf81f07e0) >> 5) * b)) & 0xf81f07e0;
4974     t |= ((((x & 0x07e0f81f) * a) + ((y & 0x07e0f81f) * b)) >> 5) & 0x07e0f81f;
4975     return t;
4976 }
4977 
blend_sourceOver_rgb16_rgb16(quint16 * Q_DECL_RESTRICT dest,const quint16 * Q_DECL_RESTRICT src,int length,const quint8 alpha,const quint8 ialpha)4978 static inline void blend_sourceOver_rgb16_rgb16(quint16 *Q_DECL_RESTRICT dest,
4979                                                 const quint16 *Q_DECL_RESTRICT src,
4980                                                 int length,
4981                                                 const quint8 alpha,
4982                                                 const quint8 ialpha)
4983 {
4984     const int dstAlign = ((quintptr)dest) & 0x3;
4985     if (dstAlign) {
4986         *dest = interpolate_pixel_rgb16_255(*src, alpha, *dest, ialpha);
4987         ++dest;
4988         ++src;
4989         --length;
4990     }
4991     const int srcAlign = ((quintptr)src) & 0x3;
4992     int length32 = length >> 1;
4993     if (length32 && srcAlign == 0) {
4994         while (length32--) {
4995             const quint32 *src32 = reinterpret_cast<const quint32*>(src);
4996             quint32 *dest32 = reinterpret_cast<quint32*>(dest);
4997             *dest32 = interpolate_pixel_rgb16x2_255(*src32, alpha,
4998                                                     *dest32, ialpha);
4999             dest += 2;
5000             src += 2;
5001         }
5002         length &= 0x1;
5003     }
5004     while (length--) {
5005         *dest = interpolate_pixel_rgb16_255(*src, alpha, *dest, ialpha);
5006         ++dest;
5007         ++src;
5008     }
5009 }
5010 
blend_untransformed_rgb565(int count,const QSpan * spans,void * userData)5011 static void blend_untransformed_rgb565(int count, const QSpan *spans, void *userData)
5012 {
5013     QSpanData *data = reinterpret_cast<QSpanData*>(userData);
5014     QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
5015 
5016     if (data->texture.format != QImage::Format_RGB16
5017             || (mode != QPainter::CompositionMode_SourceOver
5018                 && mode != QPainter::CompositionMode_Source))
5019     {
5020         blend_untransformed_generic(count, spans, userData);
5021         return;
5022     }
5023 
5024     const int image_width = data->texture.width;
5025     const int image_height = data->texture.height;
5026     int xoff = -qRound(-data->dx);
5027     int yoff = -qRound(-data->dy);
5028 
5029     const QSpan *end = spans + count;
5030     while (spans < end) {
5031         if (!spans->len) {
5032             ++spans;
5033             continue;
5034         }
5035         const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8;
5036         if (coverage == 0) {
5037             ++spans;
5038             continue;
5039         }
5040 
5041         int x = spans->x;
5042         int length = spans->len;
5043         int sx = xoff + x;
5044         int sy = yoff + spans->y;
5045         if (sy >= 0 && sy < image_height && sx < image_width) {
5046             if (sx < 0) {
5047                 x -= sx;
5048                 length += sx;
5049                 sx = 0;
5050             }
5051             if (sx + length > image_width)
5052                 length = image_width - sx;
5053             if (length > 0) {
5054                 quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + x;
5055                 const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
5056                 if (coverage == 255) {
5057                     memcpy(dest, src, length * sizeof(quint16));
5058                 } else {
5059                     const quint8 alpha = (coverage + 1) >> 3;
5060                     const quint8 ialpha = 0x20 - alpha;
5061                     if (alpha > 0)
5062                         blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha);
5063                 }
5064             }
5065         }
5066         ++spans;
5067     }
5068 }
5069 
blend_tiled_generic(int count,const QSpan * spans,void * userData)5070 static void blend_tiled_generic(int count, const QSpan *spans, void *userData)
5071 {
5072     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5073 
5074     uint buffer[BufferSize];
5075     uint src_buffer[BufferSize];
5076     Operator op = getOperator(data, spans, count);
5077 
5078     const int image_width = data->texture.width;
5079     const int image_height = data->texture.height;
5080     int xoff = -qRound(-data->dx) % image_width;
5081     int yoff = -qRound(-data->dy) % image_height;
5082 
5083     if (xoff < 0)
5084         xoff += image_width;
5085     if (yoff < 0)
5086         yoff += image_height;
5087 
5088     while (count--) {
5089         int x = spans->x;
5090         int length = spans->len;
5091         int sx = (xoff + spans->x) % image_width;
5092         int sy = (spans->y + yoff) % image_height;
5093         if (sx < 0)
5094             sx += image_width;
5095         if (sy < 0)
5096             sy += image_height;
5097 
5098         const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5099         while (length) {
5100             int l = qMin(image_width - sx, length);
5101             if (BufferSize < l)
5102                 l = BufferSize;
5103             const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
5104             uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
5105             op.func(dest, src, l, coverage);
5106             if (op.destStore)
5107                 op.destStore(data->rasterBuffer, x, spans->y, dest, l);
5108             x += l;
5109             sx += l;
5110             length -= l;
5111             if (sx >= image_width)
5112                 sx = 0;
5113         }
5114         ++spans;
5115     }
5116 }
5117 
5118 #if QT_CONFIG(raster_64bit)
blend_tiled_generic_rgb64(int count,const QSpan * spans,void * userData)5119 static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userData)
5120 {
5121     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5122 
5123     Operator op = getOperator(data, spans, count);
5124     if (!op.func64) {
5125         qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
5126         return blend_tiled_generic(count, spans, userData);
5127     }
5128     alignas(8) QRgba64 buffer[BufferSize];
5129     alignas(8) QRgba64 src_buffer[BufferSize];
5130 
5131     const int image_width = data->texture.width;
5132     const int image_height = data->texture.height;
5133     int xoff = -qRound(-data->dx) % image_width;
5134     int yoff = -qRound(-data->dy) % image_height;
5135 
5136     if (xoff < 0)
5137         xoff += image_width;
5138     if (yoff < 0)
5139         yoff += image_height;
5140 
5141     bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32;
5142     bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64;
5143     if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 || isBpp64)) {
5144         // If destination isn't blended into the result, we can do the tiling directly on destination pixels.
5145         while (count--) {
5146             int x = spans->x;
5147             int y = spans->y;
5148             int length = spans->len;
5149             int sx = (xoff + spans->x) % image_width;
5150             int sy = (spans->y + yoff) % image_height;
5151             if (sx < 0)
5152                 sx += image_width;
5153             if (sy < 0)
5154                 sy += image_height;
5155 
5156             int sl = qMin(image_width, length);
5157             if (sx > 0 && sl > 0) {
5158                 int l = qMin(image_width - sx, sl);
5159                 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
5160                 op.destStore64(data->rasterBuffer, x, y, src, l);
5161                 x += l;
5162                 sx += l;
5163                 sl -= l;
5164                 if (sx >= image_width)
5165                     sx = 0;
5166             }
5167             if (sl > 0) {
5168                 Q_ASSERT(sx == 0);
5169                 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, sl);
5170                 op.destStore64(data->rasterBuffer, x, y, src, sl);
5171                 x += sl;
5172                 sx += sl;
5173                 sl -= sl;
5174                 if (sx >= image_width)
5175                     sx = 0;
5176             }
5177             if (isBpp32) {
5178                 uint *dest = reinterpret_cast<uint *>(data->rasterBuffer->scanLine(y)) + x - image_width;
5179                 for (int i = image_width; i < length; ++i)
5180                     dest[i] = dest[i - image_width];
5181             } else {
5182                 quint64 *dest = reinterpret_cast<quint64 *>(data->rasterBuffer->scanLine(y)) + x - image_width;
5183                 for (int i = image_width; i < length; ++i)
5184                     dest[i] = dest[i - image_width];
5185             }
5186             ++spans;
5187         }
5188         return;
5189     }
5190 
5191     while (count--) {
5192         int x = spans->x;
5193         int length = spans->len;
5194         int sx = (xoff + spans->x) % image_width;
5195         int sy = (spans->y + yoff) % image_height;
5196         if (sx < 0)
5197             sx += image_width;
5198         if (sy < 0)
5199             sy += image_height;
5200 
5201         const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5202         while (length) {
5203             int l = qMin(image_width - sx, length);
5204             if (BufferSize < l)
5205                 l = BufferSize;
5206             const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
5207             QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
5208             op.func64(dest, src, l, coverage);
5209             if (op.destStore64)
5210                 op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
5211             x += l;
5212             sx += l;
5213             length -= l;
5214             if (sx >= image_width)
5215                 sx = 0;
5216         }
5217         ++spans;
5218     }
5219 }
5220 #endif
5221 
blend_tiled_argb(int count,const QSpan * spans,void * userData)5222 static void blend_tiled_argb(int count, const QSpan *spans, void *userData)
5223 {
5224     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5225     if (data->texture.format != QImage::Format_ARGB32_Premultiplied
5226         && data->texture.format != QImage::Format_RGB32) {
5227         blend_tiled_generic(count, spans, userData);
5228         return;
5229     }
5230 
5231     Operator op = getOperator(data, spans, count);
5232 
5233     int image_width = data->texture.width;
5234     int image_height = data->texture.height;
5235     int xoff = -qRound(-data->dx) % image_width;
5236     int yoff = -qRound(-data->dy) % image_height;
5237 
5238     if (xoff < 0)
5239         xoff += image_width;
5240     if (yoff < 0)
5241         yoff += image_height;
5242 
5243     while (count--) {
5244         int x = spans->x;
5245         int length = spans->len;
5246         int sx = (xoff + spans->x) % image_width;
5247         int sy = (spans->y + yoff) % image_height;
5248         if (sx < 0)
5249             sx += image_width;
5250         if (sy < 0)
5251             sy += image_height;
5252 
5253         const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5254         while (length) {
5255             int l = qMin(image_width - sx, length);
5256             if (BufferSize < l)
5257                 l = BufferSize;
5258             const uint *src = (const uint *)data->texture.scanLine(sy) + sx;
5259             uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x;
5260             op.func(dest, src, l, coverage);
5261             x += l;
5262             sx += l;
5263             length -= l;
5264             if (sx >= image_width)
5265                 sx = 0;
5266         }
5267         ++spans;
5268     }
5269 }
5270 
blend_tiled_rgb565(int count,const QSpan * spans,void * userData)5271 static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
5272 {
5273     QSpanData *data = reinterpret_cast<QSpanData*>(userData);
5274     QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
5275 
5276     if (data->texture.format != QImage::Format_RGB16
5277             || (mode != QPainter::CompositionMode_SourceOver
5278                 && mode != QPainter::CompositionMode_Source))
5279     {
5280         blend_tiled_generic(count, spans, userData);
5281         return;
5282     }
5283 
5284     const int image_width = data->texture.width;
5285     const int image_height = data->texture.height;
5286     int xoff = -qRound(-data->dx) % image_width;
5287     int yoff = -qRound(-data->dy) % image_height;
5288 
5289     if (xoff < 0)
5290         xoff += image_width;
5291     if (yoff < 0)
5292         yoff += image_height;
5293 
5294     while (count--) {
5295         const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8;
5296         if (coverage == 0) {
5297             ++spans;
5298             continue;
5299         }
5300 
5301         int x = spans->x;
5302         int length = spans->len;
5303         int sx = (xoff + spans->x) % image_width;
5304         int sy = (spans->y + yoff) % image_height;
5305         if (sx < 0)
5306             sx += image_width;
5307         if (sy < 0)
5308             sy += image_height;
5309 
5310         if (coverage == 255) {
5311             // Copy the first texture block
5312             length = qMin(image_width,length);
5313             int tx = x;
5314             while (length) {
5315                 int l = qMin(image_width - sx, length);
5316                 if (BufferSize < l)
5317                     l = BufferSize;
5318                 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + tx;
5319                 const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
5320                 memcpy(dest, src, l * sizeof(quint16));
5321                 length -= l;
5322                 tx += l;
5323                 sx += l;
5324                 if (sx >= image_width)
5325                     sx = 0;
5326             }
5327 
5328             // Now use the rasterBuffer as the source of the texture,
5329             // We can now progressively copy larger blocks
5330             // - Less cpu time in code figuring out what to copy
5331             // We are dealing with one block of data
5332             // - More likely to fit in the cache
5333             // - can use memcpy
5334             int copy_image_width = qMin(image_width, int(spans->len));
5335             length = spans->len - copy_image_width;
5336             quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x;
5337             quint16 *dest = src + copy_image_width;
5338             while (copy_image_width < length) {
5339                 memcpy(dest, src, copy_image_width * sizeof(quint16));
5340                 dest += copy_image_width;
5341                 length -= copy_image_width;
5342                 copy_image_width *= 2;
5343             }
5344             if (length > 0)
5345                 memcpy(dest, src, length * sizeof(quint16));
5346         } else {
5347             const quint8 alpha = (coverage + 1) >> 3;
5348             const quint8 ialpha = 0x20 - alpha;
5349             if (alpha > 0) {
5350                 while (length) {
5351                     int l = qMin(image_width - sx, length);
5352                     if (BufferSize < l)
5353                         l = BufferSize;
5354                     quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x;
5355                     const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
5356                     blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha);
5357                     x += l;
5358                     sx += l;
5359                     length -= l;
5360                     if (sx >= image_width)
5361                         sx = 0;
5362                 }
5363             }
5364         }
5365         ++spans;
5366     }
5367 }
5368 
5369 /* Image formats here are target formats */
5370 static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = {
5371     blend_untransformed_argb,           // Untransformed
5372     blend_tiled_argb,                   // Tiled
5373     blend_src_generic,                  // Transformed
5374     blend_src_generic,                  // TransformedTiled
5375     blend_src_generic,                  // TransformedBilinear
5376     blend_src_generic                   // TransformedBilinearTiled
5377 };
5378 
5379 static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = {
5380     blend_untransformed_rgb565,         // Untransformed
5381     blend_tiled_rgb565,                 // Tiled
5382     blend_src_generic,                  // Transformed
5383     blend_src_generic,                  // TransformedTiled
5384     blend_src_generic,                  // TransformedBilinear
5385     blend_src_generic                   // TransformedBilinearTiled
5386 };
5387 
5388 static const ProcessSpans processTextureSpansGeneric[NBlendTypes] = {
5389     blend_untransformed_generic,        // Untransformed
5390     blend_tiled_generic,                // Tiled
5391     blend_src_generic,                  // Transformed
5392     blend_src_generic,                  // TransformedTiled
5393     blend_src_generic,                  // TransformedBilinear
5394     blend_src_generic                   // TransformedBilinearTiled
5395 };
5396 
5397 #if QT_CONFIG(raster_64bit)
5398 static const ProcessSpans processTextureSpansGeneric64[NBlendTypes] = {
5399     blend_untransformed_generic_rgb64,  // Untransformed
5400     blend_tiled_generic_rgb64,          // Tiled
5401     blend_src_generic_rgb64,            // Transformed
5402     blend_src_generic_rgb64,            // TransformedTiled
5403     blend_src_generic_rgb64,            // TransformedBilinear
5404     blend_src_generic_rgb64             // TransformedBilinearTiled
5405 };
5406 #endif
5407 
qBlendTexture(int count,const QSpan * spans,void * userData)5408 void qBlendTexture(int count, const QSpan *spans, void *userData)
5409 {
5410     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5411     TextureBlendType blendType = getBlendType(data);
5412     ProcessSpans proc;
5413     switch (data->rasterBuffer->format) {
5414     case QImage::Format_ARGB32_Premultiplied:
5415         proc = processTextureSpansARGB32PM[blendType];
5416         break;
5417     case QImage::Format_RGB16:
5418         proc = processTextureSpansRGB16[blendType];
5419         break;
5420 #if QT_CONFIG(raster_64bit)
5421 #if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
5422     case QImage::Format_ARGB32:
5423     case QImage::Format_RGBA8888:
5424 #endif
5425     case QImage::Format_BGR30:
5426     case QImage::Format_A2BGR30_Premultiplied:
5427     case QImage::Format_RGB30:
5428     case QImage::Format_A2RGB30_Premultiplied:
5429     case QImage::Format_RGBX64:
5430     case QImage::Format_RGBA64:
5431     case QImage::Format_RGBA64_Premultiplied:
5432     case QImage::Format_Grayscale16:
5433         proc = processTextureSpansGeneric64[blendType];
5434         break;
5435 #endif // QT_CONFIG(raster_64bit)
5436     case QImage::Format_Invalid:
5437         Q_UNREACHABLE();
5438         return;
5439     default:
5440         proc = processTextureSpansGeneric[blendType];
5441         break;
5442     }
5443     proc(count, spans, userData);
5444 }
5445 
blend_vertical_gradient_argb(int count,const QSpan * spans,void * userData)5446 static void blend_vertical_gradient_argb(int count, const QSpan *spans, void *userData)
5447 {
5448     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5449 
5450     LinearGradientValues linear;
5451     getLinearGradientValues(&linear, data);
5452 
5453     CompositionFunctionSolid funcSolid =
5454         functionForModeSolid[data->rasterBuffer->compositionMode];
5455 
5456     /*
5457         The logic for vertical gradient calculations is a mathematically
5458         reduced copy of that in fetchLinearGradient() - which is basically:
5459 
5460             qreal ry = data->m22 * (y + 0.5) + data->dy;
5461             qreal t = linear.dy*ry + linear.off;
5462             t *= (GRADIENT_STOPTABLE_SIZE - 1);
5463             quint32 color =
5464                 qt_gradient_pixel_fixed(&data->gradient,
5465                                         int(t * FIXPT_SIZE));
5466 
5467         This has then been converted to fixed point to improve performance.
5468      */
5469     const int gss = GRADIENT_STOPTABLE_SIZE - 1;
5470     int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5471     int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5472 
5473     while (count--) {
5474         int y = spans->y;
5475         int x = spans->x;
5476 
5477         quint32 *dst = (quint32 *)(data->rasterBuffer->scanLine(y)) + x;
5478         quint32 color =
5479             qt_gradient_pixel_fixed(&data->gradient, yinc * y + off);
5480 
5481         funcSolid(dst, spans->len, color, spans->coverage);
5482         ++spans;
5483     }
5484 }
5485 
5486 template<ProcessSpans blend_color>
blend_vertical_gradient(int count,const QSpan * spans,void * userData)5487 static void blend_vertical_gradient(int count, const QSpan *spans, void *userData)
5488 {
5489     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5490 
5491     LinearGradientValues linear;
5492     getLinearGradientValues(&linear, data);
5493 
5494     // Based on the same logic as blend_vertical_gradient_argb.
5495 
5496     const int gss = GRADIENT_STOPTABLE_SIZE - 1;
5497     int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5498     int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5499 
5500     while (count--) {
5501         int y = spans->y;
5502 
5503 #if QT_CONFIG(raster_64bit)
5504         data->solidColor = qt_gradient_pixel64_fixed(&data->gradient, yinc * y + off);
5505 #else
5506         data->solidColor = QRgba64::fromArgb32(qt_gradient_pixel_fixed(&data->gradient, yinc * y + off));
5507 #endif
5508         blend_color(1, spans, userData);
5509         ++spans;
5510     }
5511 }
5512 
qBlendGradient(int count,const QSpan * spans,void * userData)5513 void qBlendGradient(int count, const QSpan *spans, void *userData)
5514 {
5515     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5516     bool isVerticalGradient =
5517         data->txop <= QTransform::TxScale &&
5518         data->type == QSpanData::LinearGradient &&
5519         data->gradient.linear.end.x == data->gradient.linear.origin.x;
5520     switch (data->rasterBuffer->format) {
5521     case QImage::Format_RGB16:
5522         if (isVerticalGradient)
5523             return blend_vertical_gradient<blend_color_rgb16>(count, spans, userData);
5524         return blend_src_generic(count, spans, userData);
5525     case QImage::Format_RGB32:
5526     case QImage::Format_ARGB32_Premultiplied:
5527         if (isVerticalGradient)
5528             return blend_vertical_gradient_argb(count, spans, userData);
5529         return blend_src_generic(count, spans, userData);
5530 #if QT_CONFIG(raster_64bit)
5531 #if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
5532     case QImage::Format_ARGB32:
5533     case QImage::Format_RGBA8888:
5534 #endif
5535     case QImage::Format_BGR30:
5536     case QImage::Format_A2BGR30_Premultiplied:
5537     case QImage::Format_RGB30:
5538     case QImage::Format_A2RGB30_Premultiplied:
5539     case QImage::Format_RGBX64:
5540     case QImage::Format_RGBA64:
5541     case QImage::Format_RGBA64_Premultiplied:
5542         if (isVerticalGradient)
5543             return blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData);
5544         return blend_src_generic_rgb64(count, spans, userData);
5545 #endif // QT_CONFIG(raster_64bit)
5546     case QImage::Format_Invalid:
5547         break;
5548     default:
5549         if (isVerticalGradient)
5550             return blend_vertical_gradient<blend_color_generic>(count, spans, userData);
5551         return blend_src_generic(count, spans, userData);
5552     }
5553     Q_UNREACHABLE();
5554 }
5555 
5556 template <class DST> static
qt_bitmapblit_template(QRasterBuffer * rasterBuffer,int x,int y,DST color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5557 inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer,
5558                                    int x, int y, DST color,
5559                                    const uchar *map,
5560                                    int mapWidth, int mapHeight, int mapStride)
5561 {
5562     DST *dest = reinterpret_cast<DST *>(rasterBuffer->scanLine(y)) + x;
5563     const int destStride = rasterBuffer->stride<DST>();
5564 
5565     if (mapWidth > 8) {
5566         while (mapHeight--) {
5567             int x0 = 0;
5568             int n = 0;
5569             for (int x = 0; x < mapWidth; x += 8) {
5570                 uchar s = map[x >> 3];
5571                 for (int i = 0; i < 8; ++i) {
5572                     if (s & 0x80) {
5573                         ++n;
5574                     } else {
5575                         if (n) {
5576                             qt_memfill(dest + x0, color, n);
5577                             x0 += n + 1;
5578                             n = 0;
5579                         } else {
5580                             ++x0;
5581                         }
5582                         if (!s) {
5583                             x0 += 8 - 1 - i;
5584                             break;
5585                         }
5586                     }
5587                     s <<= 1;
5588                 }
5589             }
5590             if (n)
5591                 qt_memfill(dest + x0, color, n);
5592             dest += destStride;
5593             map += mapStride;
5594         }
5595     } else {
5596         while (mapHeight--) {
5597             int x0 = 0;
5598             int n = 0;
5599             for (uchar s = *map; s; s <<= 1) {
5600                 if (s & 0x80) {
5601                     ++n;
5602                 } else if (n) {
5603                     qt_memfill(dest + x0, color, n);
5604                     x0 += n + 1;
5605                     n = 0;
5606                 } else {
5607                     ++x0;
5608                 }
5609             }
5610             if (n)
5611                 qt_memfill(dest + x0, color, n);
5612             dest += destStride;
5613             map += mapStride;
5614         }
5615     }
5616 }
5617 
qt_bitmapblit_argb32(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5618 inline static void qt_bitmapblit_argb32(QRasterBuffer *rasterBuffer,
5619                                    int x, int y, const QRgba64 &color,
5620                                    const uchar *map,
5621                                    int mapWidth, int mapHeight, int mapStride)
5622 {
5623     qt_bitmapblit_template<quint32>(rasterBuffer, x,  y, color.toArgb32(),
5624                                     map, mapWidth, mapHeight, mapStride);
5625 }
5626 
qt_bitmapblit_rgba8888(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5627 inline static void qt_bitmapblit_rgba8888(QRasterBuffer *rasterBuffer,
5628                                    int x, int y, const QRgba64 &color,
5629                                    const uchar *map,
5630                                    int mapWidth, int mapHeight, int mapStride)
5631 {
5632     qt_bitmapblit_template<quint32>(rasterBuffer, x, y, ARGB2RGBA(color.toArgb32()),
5633                                     map, mapWidth, mapHeight, mapStride);
5634 }
5635 
5636 template<QtPixelOrder PixelOrder>
qt_bitmapblit_rgb30(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5637 inline static void qt_bitmapblit_rgb30(QRasterBuffer *rasterBuffer,
5638                                    int x, int y, const QRgba64 &color,
5639                                    const uchar *map,
5640                                    int mapWidth, int mapHeight, int mapStride)
5641 {
5642     qt_bitmapblit_template<quint32>(rasterBuffer, x, y, qConvertRgb64ToRgb30<PixelOrder>(color),
5643                                     map, mapWidth, mapHeight, mapStride);
5644 }
5645 
qt_bitmapblit_quint16(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5646 inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer,
5647                                    int x, int y, const QRgba64 &color,
5648                                    const uchar *map,
5649                                    int mapWidth, int mapHeight, int mapStride)
5650 {
5651     qt_bitmapblit_template<quint16>(rasterBuffer, x,  y, color.toRgb16(),
5652                                     map, mapWidth, mapHeight, mapStride);
5653 }
5654 
grayBlendPixel(quint32 * dst,int coverage,QRgba64 srcLinear,const QColorTrcLut * colorProfile)5655 static inline void grayBlendPixel(quint32 *dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5656 {
5657     // Do a gammacorrected gray alphablend...
5658     const QRgba64 dstLinear = colorProfile ? colorProfile->toLinear64(*dst) : QRgba64::fromArgb32(*dst);
5659 
5660     QRgba64 blend = interpolate255(srcLinear, coverage, dstLinear, 255 - coverage);
5661 
5662     *dst = colorProfile ? colorProfile->fromLinear64(blend) : toArgb32(blend);
5663 }
5664 
alphamapblend_argb32(quint32 * dst,int coverage,QRgba64 srcLinear,quint32 src,const QColorTrcLut * colorProfile)5665 static inline void alphamapblend_argb32(quint32 *dst, int coverage, QRgba64 srcLinear, quint32 src, const QColorTrcLut *colorProfile)
5666 {
5667     if (coverage == 0) {
5668         // nothing
5669     } else if (coverage == 255 || !colorProfile) {
5670         blend_pixel(*dst, src, coverage);
5671     } else if (*dst < 0xff000000) {
5672         // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
5673         blend_pixel(*dst, src, coverage);
5674     } else if (src >= 0xff000000) {
5675         grayBlendPixel(dst, coverage, srcLinear, colorProfile);
5676     } else {
5677         // First do naive blend with text-color
5678         QRgb s = *dst;
5679         blend_pixel(s, src);
5680         // Then gamma-corrected blend with glyph shape
5681         QRgba64 s64 = colorProfile ? colorProfile->toLinear64(s) : QRgba64::fromArgb32(s);
5682         grayBlendPixel(dst, coverage, s64, colorProfile);
5683     }
5684 }
5685 
5686 #if QT_CONFIG(raster_64bit)
5687 
grayBlendPixel(QRgba64 & dst,int coverage,QRgba64 srcLinear,const QColorTrcLut * colorProfile)5688 static inline void grayBlendPixel(QRgba64 &dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5689 {
5690     // Do a gammacorrected gray alphablend...
5691     QRgba64 dstColor = dst;
5692     if (colorProfile) {
5693         if (dstColor.isOpaque())
5694             dstColor = colorProfile->toLinear(dstColor);
5695         else if (!dstColor.isTransparent())
5696             dstColor = colorProfile->toLinear(dstColor.unpremultiplied()).premultiplied();
5697     }
5698 
5699     blend_pixel(dstColor, srcLinear, coverage);
5700 
5701     if (colorProfile) {
5702         if (dstColor.isOpaque())
5703             dstColor = colorProfile->fromLinear(dstColor);
5704         else if (!dstColor.isTransparent())
5705             dstColor = colorProfile->fromLinear(dstColor.unpremultiplied()).premultiplied();
5706     }
5707     dst = dstColor;
5708 }
5709 
alphamapblend_generic(int coverage,QRgba64 * dest,int x,const QRgba64 & srcLinear,const QRgba64 & src,const QColorTrcLut * colorProfile)5710 static inline void alphamapblend_generic(int coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
5711 {
5712     if (coverage == 0) {
5713         // nothing
5714     } else if (coverage == 255) {
5715         blend_pixel(dest[x], src);
5716     } else if (src.isOpaque()) {
5717         grayBlendPixel(dest[x], coverage, srcLinear, colorProfile);
5718     } else {
5719         // First do naive blend with text-color
5720         QRgba64 s = dest[x];
5721         blend_pixel(s, src);
5722         // Then gamma-corrected blend with glyph shape
5723         if (colorProfile)
5724             s = colorProfile->toLinear(s);
5725         grayBlendPixel(dest[x], coverage, s, colorProfile);
5726     }
5727 }
5728 
qt_alphamapblit_generic(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride,const QClipData * clip,bool useGammaCorrection)5729 static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5730                                     int x, int y, const QRgba64 &color,
5731                                     const uchar *map,
5732                                     int mapWidth, int mapHeight, int mapStride,
5733                                     const QClipData *clip, bool useGammaCorrection)
5734 {
5735     if (color.isTransparent())
5736         return;
5737 
5738     const QColorTrcLut *colorProfile = nullptr;
5739 
5740     if (useGammaCorrection)
5741         colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5742 
5743     QRgba64 srcColor = color;
5744     if (colorProfile && color.isOpaque())
5745         srcColor = colorProfile->toLinear(srcColor);
5746 
5747     alignas(8) QRgba64 buffer[BufferSize];
5748     const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5749     const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5750 
5751     if (!clip) {
5752         for (int ly = 0; ly < mapHeight; ++ly) {
5753             int i = x;
5754             int length = mapWidth;
5755             while (length > 0) {
5756                 int l = qMin(BufferSize, length);
5757                 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5758                 for (int j=0; j < l; ++j) {
5759                     const int coverage = map[j + (i - x)];
5760                     alphamapblend_generic(coverage, dest, j, srcColor, color, colorProfile);
5761                 }
5762                 if (destStore64)
5763                     destStore64(rasterBuffer, i, y + ly, dest, l);
5764                 length -= l;
5765                 i += l;
5766             }
5767             map += mapStride;
5768         }
5769     } else {
5770         int bottom = qMin(y + mapHeight, rasterBuffer->height());
5771 
5772         int top = qMax(y, 0);
5773         map += (top - y) * mapStride;
5774 
5775         const_cast<QClipData *>(clip)->initialize();
5776         for (int yp = top; yp<bottom; ++yp) {
5777             const QClipData::ClipLine &line = clip->m_clipLines[yp];
5778 
5779             for (int i=0; i<line.count; ++i) {
5780                 const QSpan &clip = line.spans[i];
5781 
5782                 int start = qMax<int>(x, clip.x);
5783                 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5784                 if (end <= start)
5785                     continue;
5786                 Q_ASSERT(end - start <= BufferSize);
5787                 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
5788 
5789                 for (int xp=start; xp<end; ++xp) {
5790                     const int coverage = map[xp - x];
5791                     alphamapblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile);
5792                 }
5793                 if (destStore64)
5794                     destStore64(rasterBuffer, start, clip.y, dest, end - start);
5795             } // for (i -> line.count)
5796             map += mapStride;
5797         } // for (yp -> bottom)
5798     }
5799 }
5800 #else
qt_alphamapblit_generic(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride,const QClipData * clip,bool useGammaCorrection)5801 static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5802                                     int x, int y, const QRgba64 &color,
5803                                     const uchar *map,
5804                                     int mapWidth, int mapHeight, int mapStride,
5805                                     const QClipData *clip, bool useGammaCorrection)
5806 {
5807     if (color.isTransparent())
5808         return;
5809 
5810     const quint32 c = color.toArgb32();
5811 
5812     const QColorTrcLut *colorProfile = nullptr;
5813 
5814     if (useGammaCorrection)
5815         colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5816 
5817     QRgba64 srcColor = color;
5818     if (colorProfile && color.isOpaque())
5819         srcColor = colorProfile->toLinear(srcColor);
5820 
5821     quint32 buffer[BufferSize];
5822     const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
5823     const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
5824 
5825     if (!clip) {
5826         for (int ly = 0; ly < mapHeight; ++ly) {
5827             int i = x;
5828             int length = mapWidth;
5829             while (length > 0) {
5830                 int l = qMin(BufferSize, length);
5831                 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
5832                 for (int j=0; j < l; ++j) {
5833                     const int coverage = map[j + (i - x)];
5834                     alphamapblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
5835                 }
5836                 if (destStore)
5837                     destStore(rasterBuffer, i, y + ly, dest, l);
5838                 length -= l;
5839                 i += l;
5840             }
5841             map += mapStride;
5842         }
5843     } else {
5844         int bottom = qMin(y + mapHeight, rasterBuffer->height());
5845 
5846         int top = qMax(y, 0);
5847         map += (top - y) * mapStride;
5848 
5849         const_cast<QClipData *>(clip)->initialize();
5850         for (int yp = top; yp<bottom; ++yp) {
5851             const QClipData::ClipLine &line = clip->m_clipLines[yp];
5852 
5853             for (int i=0; i<line.count; ++i) {
5854                 const QSpan &clip = line.spans[i];
5855 
5856                 int start = qMax<int>(x, clip.x);
5857                 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5858                 if (end <= start)
5859                     continue;
5860                 Q_ASSERT(end - start <= BufferSize);
5861                 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
5862 
5863                 for (int xp=start; xp<end; ++xp) {
5864                     const int coverage = map[xp - x];
5865                     alphamapblend_argb32(dest + xp - x, coverage, srcColor, color, colorProfile);
5866                 }
5867                 if (destStore)
5868                     destStore(rasterBuffer, start, clip.y, dest, end - start);
5869             } // for (i -> line.count)
5870             map += mapStride;
5871         } // for (yp -> bottom)
5872     }
5873 }
5874 #endif
5875 
alphamapblend_quint16(int coverage,quint16 * dest,int x,const quint16 srcColor)5876 static inline void alphamapblend_quint16(int coverage, quint16 *dest, int x, const quint16 srcColor)
5877 {
5878     if (coverage == 0) {
5879         // nothing
5880     } else if (coverage == 255) {
5881         dest[x] = srcColor;
5882     } else {
5883         dest[x] = BYTE_MUL_RGB16(srcColor, coverage)
5884                 + BYTE_MUL_RGB16(dest[x], 255 - coverage);
5885     }
5886 }
5887 
qt_alphamapblit_quint16(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride,const QClipData * clip,bool useGammaCorrection)5888 void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
5889                              int x, int y, const QRgba64 &color,
5890                              const uchar *map,
5891                              int mapWidth, int mapHeight, int mapStride,
5892                              const QClipData *clip, bool useGammaCorrection)
5893 {
5894     if (useGammaCorrection || !color.isOpaque()) {
5895         qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
5896         return;
5897     }
5898 
5899     const quint16 c = color.toRgb16();
5900 
5901     if (!clip) {
5902         quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
5903         const int destStride = rasterBuffer->stride<quint16>();
5904         while (mapHeight--) {
5905             for (int i = 0; i < mapWidth; ++i)
5906                 alphamapblend_quint16(map[i], dest, i, c);
5907             dest += destStride;
5908             map += mapStride;
5909         }
5910     } else {
5911         int top = qMax(y, 0);
5912         int bottom = qMin(y + mapHeight, rasterBuffer->height());
5913         map += (top - y) * mapStride;
5914 
5915         const_cast<QClipData *>(clip)->initialize();
5916         for (int yp = top; yp<bottom; ++yp) {
5917             const QClipData::ClipLine &line = clip->m_clipLines[yp];
5918 
5919             quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(yp));
5920 
5921             for (int i=0; i<line.count; ++i) {
5922                 const QSpan &clip = line.spans[i];
5923 
5924                 int start = qMax<int>(x, clip.x);
5925                 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5926 
5927                 for (int xp=start; xp<end; ++xp)
5928                     alphamapblend_quint16(map[xp - x], dest, xp, c);
5929             } // for (i -> line.count)
5930             map += mapStride;
5931         } // for (yp -> bottom)
5932     }
5933 }
5934 
qt_alphamapblit_argb32(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride,const QClipData * clip,bool useGammaCorrection)5935 static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer,
5936                                    int x, int y, const QRgba64 &color,
5937                                    const uchar *map,
5938                                    int mapWidth, int mapHeight, int mapStride,
5939                                    const QClipData *clip, bool useGammaCorrection)
5940 {
5941     const quint32 c = color.toArgb32();
5942     const int destStride = rasterBuffer->stride<quint32>();
5943 
5944     if (color.isTransparent())
5945         return;
5946 
5947     const QColorTrcLut *colorProfile = nullptr;
5948 
5949     if (useGammaCorrection)
5950         colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5951 
5952     QRgba64 srcColor = color;
5953     if (colorProfile && color.isOpaque())
5954         srcColor = colorProfile->toLinear(srcColor);
5955 
5956     if (!clip) {
5957         quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
5958         while (mapHeight--) {
5959             for (int i = 0; i < mapWidth; ++i) {
5960                 const int coverage = map[i];
5961                 alphamapblend_argb32(dest + i, coverage, srcColor, c, colorProfile);
5962             }
5963             dest += destStride;
5964             map += mapStride;
5965         }
5966     } else {
5967         int bottom = qMin(y + mapHeight, rasterBuffer->height());
5968 
5969         int top = qMax(y, 0);
5970         map += (top - y) * mapStride;
5971 
5972         const_cast<QClipData *>(clip)->initialize();
5973         for (int yp = top; yp<bottom; ++yp) {
5974             const QClipData::ClipLine &line = clip->m_clipLines[yp];
5975 
5976             quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(yp));
5977 
5978             for (int i=0; i<line.count; ++i) {
5979                 const QSpan &clip = line.spans[i];
5980 
5981                 int start = qMax<int>(x, clip.x);
5982                 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5983 
5984                 for (int xp=start; xp<end; ++xp) {
5985                     const int coverage = map[xp - x];
5986                     alphamapblend_argb32(dest + xp, coverage, srcColor, c, colorProfile);
5987                 } // for (i -> line.count)
5988             } // for (yp -> bottom)
5989             map += mapStride;
5990         }
5991     }
5992 }
5993 
qRgbAvg(QRgb rgb)5994 static inline int qRgbAvg(QRgb rgb)
5995 {
5996     return (qRed(rgb) * 5 + qGreen(rgb) * 6 + qBlue(rgb) * 5) / 16;
5997 }
5998 
rgbBlendPixel(quint32 * dst,int coverage,QRgba64 slinear,const QColorTrcLut * colorProfile)5999 static inline void rgbBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
6000 {
6001     // Do a gammacorrected RGB alphablend...
6002     const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(*dst) : QRgba64::fromArgb32(*dst);
6003 
6004     QRgba64 blend = rgbBlend(dlinear, slinear, coverage);
6005 
6006     *dst = colorProfile ? colorProfile->fromLinear64(blend) : toArgb32(blend);
6007 }
6008 
rgbBlend(QRgb d,QRgb s,uint rgbAlpha)6009 static inline QRgb rgbBlend(QRgb d, QRgb s, uint rgbAlpha)
6010 {
6011 #if defined(__SSE2__)
6012     __m128i vd = _mm_cvtsi32_si128(d);
6013     __m128i vs = _mm_cvtsi32_si128(s);
6014     __m128i va = _mm_cvtsi32_si128(rgbAlpha);
6015     const __m128i vz = _mm_setzero_si128();
6016     vd = _mm_unpacklo_epi8(vd, vz);
6017     vs = _mm_unpacklo_epi8(vs, vz);
6018     va = _mm_unpacklo_epi8(va, vz);
6019     __m128i vb = _mm_xor_si128(_mm_set1_epi16(255), va);
6020     vs = _mm_mullo_epi16(vs, va);
6021     vd = _mm_mullo_epi16(vd, vb);
6022     vd = _mm_add_epi16(vd, vs);
6023     vd = _mm_add_epi16(vd, _mm_srli_epi16(vd, 8));
6024     vd = _mm_add_epi16(vd, _mm_set1_epi16(0x80));
6025     vd = _mm_srli_epi16(vd, 8);
6026     vd = _mm_packus_epi16(vd, vd);
6027     return _mm_cvtsi128_si32(vd);
6028 #else
6029     const int dr = qRed(d);
6030     const int dg = qGreen(d);
6031     const int db = qBlue(d);
6032 
6033     const int sr = qRed(s);
6034     const int sg = qGreen(s);
6035     const int sb = qBlue(s);
6036 
6037     const int mr = qRed(rgbAlpha);
6038     const int mg = qGreen(rgbAlpha);
6039     const int mb = qBlue(rgbAlpha);
6040 
6041     const int nr = qt_div_255(sr * mr + dr * (255 - mr));
6042     const int ng = qt_div_255(sg * mg + dg * (255 - mg));
6043     const int nb = qt_div_255(sb * mb + db * (255 - mb));
6044 
6045     return 0xff000000 | (nr << 16) | (ng << 8) | nb;
6046 #endif
6047 }
6048 
alphargbblend_argb32(quint32 * dst,uint coverage,const QRgba64 & srcLinear,quint32 src,const QColorTrcLut * colorProfile)6049 static inline void alphargbblend_argb32(quint32 *dst, uint coverage, const QRgba64 &srcLinear, quint32 src, const QColorTrcLut *colorProfile)
6050 {
6051     if (coverage == 0xff000000) {
6052         // nothing
6053     } else if (coverage == 0xffffffff && qAlpha(src) == 255) {
6054         blend_pixel(*dst, src);
6055     } else if (*dst < 0xff000000) {
6056         // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
6057         blend_pixel(*dst, src, qRgbAvg(coverage));
6058     } else if (!colorProfile) {
6059         // First do naive blend with text-color
6060         QRgb s = *dst;
6061         blend_pixel(s, src);
6062         // Then a naive blend with glyph shape
6063         *dst = rgbBlend(*dst, s, coverage);
6064     } else if (srcLinear.isOpaque()) {
6065         rgbBlendPixel(dst, coverage, srcLinear, colorProfile);
6066     } else {
6067         // First do naive blend with text-color
6068         QRgb s = *dst;
6069         blend_pixel(s, src);
6070         // Then gamma-corrected blend with glyph shape
6071         QRgba64 s64 = colorProfile ? colorProfile->toLinear64(s) : QRgba64::fromArgb32(s);
6072         rgbBlendPixel(dst, coverage, s64, colorProfile);
6073     }
6074 }
6075 
6076 #if QT_CONFIG(raster_64bit)
rgbBlendPixel(QRgba64 & dst,int coverage,QRgba64 slinear,const QColorTrcLut * colorProfile)6077 static inline void rgbBlendPixel(QRgba64 &dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
6078 {
6079     // Do a gammacorrected RGB alphablend...
6080     const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(dst) : dst;
6081 
6082     QRgba64 blend = rgbBlend(dlinear, slinear, coverage);
6083 
6084     dst = colorProfile ? colorProfile->fromLinear(blend) : blend;
6085 }
6086 
alphargbblend_generic(uint coverage,QRgba64 * dest,int x,const QRgba64 & srcLinear,const QRgba64 & src,const QColorTrcLut * colorProfile)6087 static inline void alphargbblend_generic(uint coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
6088 {
6089     if (coverage == 0xff000000) {
6090         // nothing
6091     } else if (coverage == 0xffffffff) {
6092         blend_pixel(dest[x], src);
6093     } else if (!dest[x].isOpaque()) {
6094         // Do a gray alphablend.
6095         alphamapblend_generic(qRgbAvg(coverage), dest, x, srcLinear, src, colorProfile);
6096     } else if (src.isOpaque()) {
6097         rgbBlendPixel(dest[x], coverage, srcLinear, colorProfile);
6098     } else {
6099         // First do naive blend with text-color
6100         QRgba64 s = dest[x];
6101         blend_pixel(s, src);
6102         // Then gamma-corrected blend with glyph shape
6103         if (colorProfile)
6104             s = colorProfile->toLinear(s);
6105         rgbBlendPixel(dest[x], coverage, s, colorProfile);
6106     }
6107 }
6108 
qt_alphargbblit_generic(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uint * src,int mapWidth,int mapHeight,int srcStride,const QClipData * clip,bool useGammaCorrection)6109 static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
6110                                     int x, int y, const QRgba64 &color,
6111                                     const uint *src, int mapWidth, int mapHeight, int srcStride,
6112                                     const QClipData *clip, bool useGammaCorrection)
6113 {
6114     if (color.isTransparent())
6115         return;
6116 
6117     const QColorTrcLut *colorProfile = nullptr;
6118 
6119     if (useGammaCorrection)
6120         colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6121 
6122     QRgba64 srcColor = color;
6123     if (colorProfile && color.isOpaque())
6124         srcColor = colorProfile->toLinear(srcColor);
6125 
6126     alignas(8) QRgba64 buffer[BufferSize];
6127     const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
6128     const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
6129 
6130     if (!clip) {
6131         for (int ly = 0; ly < mapHeight; ++ly) {
6132             int i = x;
6133             int length = mapWidth;
6134             while (length > 0) {
6135                 int l = qMin(BufferSize, length);
6136                 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
6137                 for (int j=0; j < l; ++j) {
6138                     const uint coverage = src[j + (i - x)];
6139                     alphargbblend_generic(coverage, dest, j, srcColor, color, colorProfile);
6140                 }
6141                 if (destStore64)
6142                     destStore64(rasterBuffer, i, y + ly, dest, l);
6143                 length -= l;
6144                 i += l;
6145             }
6146             src += srcStride;
6147         }
6148     } else {
6149         int bottom = qMin(y + mapHeight, rasterBuffer->height());
6150 
6151         int top = qMax(y, 0);
6152         src += (top - y) * srcStride;
6153 
6154         const_cast<QClipData *>(clip)->initialize();
6155         for (int yp = top; yp<bottom; ++yp) {
6156             const QClipData::ClipLine &line = clip->m_clipLines[yp];
6157 
6158             for (int i=0; i<line.count; ++i) {
6159                 const QSpan &clip = line.spans[i];
6160 
6161                 int start = qMax<int>(x, clip.x);
6162                 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
6163                 if (end <= start)
6164                     continue;
6165                 Q_ASSERT(end - start <= BufferSize);
6166                 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
6167 
6168                 for (int xp=start; xp<end; ++xp) {
6169                     const uint coverage = src[xp - x];
6170                     alphargbblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile);
6171                 }
6172                 if (destStore64)
6173                     destStore64(rasterBuffer, start, clip.y, dest, end - start);
6174             } // for (i -> line.count)
6175             src += srcStride;
6176         } // for (yp -> bottom)
6177     }
6178 }
6179 #else
qt_alphargbblit_generic(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uint * src,int mapWidth,int mapHeight,int srcStride,const QClipData * clip,bool useGammaCorrection)6180 static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
6181                                     int x, int y, const QRgba64 &color,
6182                                     const uint *src, int mapWidth, int mapHeight, int srcStride,
6183                                     const QClipData *clip, bool useGammaCorrection)
6184 {
6185     if (color.isTransparent())
6186         return;
6187 
6188     const quint32 c = color.toArgb32();
6189 
6190     const QColorTrcLut *colorProfile = nullptr;
6191 
6192     if (useGammaCorrection)
6193         colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6194 
6195     QRgba64 srcColor = color;
6196     if (colorProfile && color.isOpaque())
6197         srcColor = colorProfile->toLinear(srcColor);
6198 
6199     quint32 buffer[BufferSize];
6200     const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
6201     const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
6202 
6203     if (!clip) {
6204         for (int ly = 0; ly < mapHeight; ++ly) {
6205             int i = x;
6206             int length = mapWidth;
6207             while (length > 0) {
6208                 int l = qMin(BufferSize, length);
6209                 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
6210                 for (int j=0; j < l; ++j) {
6211                     const uint coverage = src[j + (i - x)];
6212                     alphargbblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
6213                 }
6214                 if (destStore)
6215                     destStore(rasterBuffer, i, y + ly, dest, l);
6216                 length -= l;
6217                 i += l;
6218             }
6219             src += srcStride;
6220         }
6221     } else {
6222         int bottom = qMin(y + mapHeight, rasterBuffer->height());
6223 
6224         int top = qMax(y, 0);
6225         src += (top - y) * srcStride;
6226 
6227         const_cast<QClipData *>(clip)->initialize();
6228         for (int yp = top; yp<bottom; ++yp) {
6229             const QClipData::ClipLine &line = clip->m_clipLines[yp];
6230 
6231             for (int i=0; i<line.count; ++i) {
6232                 const QSpan &clip = line.spans[i];
6233 
6234                 int start = qMax<int>(x, clip.x);
6235                 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
6236                 if (end <= start)
6237                     continue;
6238                 Q_ASSERT(end - start <= BufferSize);
6239                 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
6240 
6241                 for (int xp=start; xp<end; ++xp) {
6242                     const uint coverage = src[xp - x];
6243                     alphargbblend_argb32(dest + xp - start, coverage, srcColor, c, colorProfile);
6244                 }
6245                 if (destStore)
6246                     destStore(rasterBuffer, start, clip.y, dest, end - start);
6247             } // for (i -> line.count)
6248             src += srcStride;
6249         } // for (yp -> bottom)
6250     }
6251 }
6252 #endif
6253 
qt_alphargbblit_argb32(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uint * src,int mapWidth,int mapHeight,int srcStride,const QClipData * clip,bool useGammaCorrection)6254 static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
6255                                    int x, int y, const QRgba64 &color,
6256                                    const uint *src, int mapWidth, int mapHeight, int srcStride,
6257                                    const QClipData *clip, bool useGammaCorrection)
6258 {
6259     if (color.isTransparent())
6260         return;
6261 
6262     const quint32 c = color.toArgb32();
6263 
6264     const QColorTrcLut *colorProfile = nullptr;
6265 
6266     if (useGammaCorrection)
6267         colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6268 
6269     QRgba64 srcColor = color;
6270     if (colorProfile && color.isOpaque())
6271         srcColor = colorProfile->toLinear(srcColor);
6272 
6273     if (!clip) {
6274         quint32 *dst = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
6275         const int destStride = rasterBuffer->stride<quint32>();
6276         while (mapHeight--) {
6277             for (int i = 0; i < mapWidth; ++i) {
6278                 const uint coverage = src[i];
6279                 alphargbblend_argb32(dst + i, coverage, srcColor, c, colorProfile);
6280             }
6281 
6282             dst += destStride;
6283             src += srcStride;
6284         }
6285     } else {
6286         int bottom = qMin(y + mapHeight, rasterBuffer->height());
6287 
6288         int top = qMax(y, 0);
6289         src += (top - y) * srcStride;
6290 
6291         const_cast<QClipData *>(clip)->initialize();
6292         for (int yp = top; yp<bottom; ++yp) {
6293             const QClipData::ClipLine &line = clip->m_clipLines[yp];
6294 
6295             quint32 *dst = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(yp));
6296 
6297             for (int i=0; i<line.count; ++i) {
6298                 const QSpan &clip = line.spans[i];
6299 
6300                 int start = qMax<int>(x, clip.x);
6301                 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
6302 
6303                 for (int xp=start; xp<end; ++xp) {
6304                     const uint coverage = src[xp - x];
6305                     alphargbblend_argb32(dst + xp, coverage, srcColor, c, colorProfile);
6306                 }
6307             } // for (i -> line.count)
6308             src += srcStride;
6309         } // for (yp -> bottom)
6310 
6311     }
6312 }
6313 
qt_rectfill_argb32(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6314 static void qt_rectfill_argb32(QRasterBuffer *rasterBuffer,
6315                                int x, int y, int width, int height,
6316                                const QRgba64 &color)
6317 {
6318     qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6319                          color.toArgb32(), x, y, width, height, rasterBuffer->bytesPerLine());
6320 }
6321 
qt_rectfill_quint16(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6322 static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer,
6323                                 int x, int y, int width, int height,
6324                                 const QRgba64 &color)
6325 {
6326     const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6327     quint32 c32 = color.toArgb32();
6328     quint16 c16;
6329     layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c16), &c32, 0, 1, nullptr, nullptr);
6330     qt_rectfill<quint16>(reinterpret_cast<quint16 *>(rasterBuffer->buffer()),
6331                          c16, x, y, width, height, rasterBuffer->bytesPerLine());
6332 }
6333 
qt_rectfill_quint24(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6334 static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer,
6335                                 int x, int y, int width, int height,
6336                                 const QRgba64 &color)
6337 {
6338     const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6339     quint32 c32 = color.toArgb32();
6340     quint24 c24;
6341     layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c24), &c32, 0, 1, nullptr, nullptr);
6342     qt_rectfill<quint24>(reinterpret_cast<quint24 *>(rasterBuffer->buffer()),
6343                          c24, x, y, width, height, rasterBuffer->bytesPerLine());
6344 }
6345 
qt_rectfill_nonpremul_argb32(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6346 static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer,
6347                                          int x, int y, int width, int height,
6348                                          const QRgba64 &color)
6349 {
6350     qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6351                          color.unpremultiplied().toArgb32(), x, y, width, height, rasterBuffer->bytesPerLine());
6352 }
6353 
qt_rectfill_rgba(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6354 static void qt_rectfill_rgba(QRasterBuffer *rasterBuffer,
6355                              int x, int y, int width, int height,
6356                              const QRgba64 &color)
6357 {
6358     qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6359                          ARGB2RGBA(color.toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
6360 }
6361 
qt_rectfill_nonpremul_rgba(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6362 static void qt_rectfill_nonpremul_rgba(QRasterBuffer *rasterBuffer,
6363                                        int x, int y, int width, int height,
6364                                        const QRgba64 &color)
6365 {
6366     qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6367                          ARGB2RGBA(color.unpremultiplied().toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
6368 }
6369 
6370 template<QtPixelOrder PixelOrder>
qt_rectfill_rgb30(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6371 static void qt_rectfill_rgb30(QRasterBuffer *rasterBuffer,
6372                               int x, int y, int width, int height,
6373                               const QRgba64 &color)
6374 {
6375     qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6376                          qConvertRgb64ToRgb30<PixelOrder>(color), x, y, width, height, rasterBuffer->bytesPerLine());
6377 }
6378 
qt_rectfill_alpha(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6379 static void qt_rectfill_alpha(QRasterBuffer *rasterBuffer,
6380                              int x, int y, int width, int height,
6381                              const QRgba64 &color)
6382 {
6383     qt_rectfill<quint8>(reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6384                          color.alpha() >> 8, x, y, width, height, rasterBuffer->bytesPerLine());
6385 }
6386 
qt_rectfill_gray(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6387 static void qt_rectfill_gray(QRasterBuffer *rasterBuffer,
6388                              int x, int y, int width, int height,
6389                              const QRgba64 &color)
6390 {
6391     qt_rectfill<quint8>(reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6392                          qGray(color.toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
6393 }
6394 
qt_rectfill_quint64(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6395 static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer,
6396                                 int x, int y, int width, int height,
6397                                 const QRgba64 &color)
6398 {
6399     const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
6400     quint64 c64;
6401     store(reinterpret_cast<uchar *>(&c64), &color, 0, 1, nullptr, nullptr);
6402     qt_rectfill<quint64>(reinterpret_cast<quint64 *>(rasterBuffer->buffer()),
6403                          c64, x, y, width, height, rasterBuffer->bytesPerLine());
6404 }
6405 
6406 // Map table for destination image format. Contains function pointers
6407 // for blends of various types unto the destination
6408 
6409 DrawHelper qDrawHelper[QImage::NImageFormats] =
6410 {
6411     // Format_Invalid,
6412     { nullptr, nullptr, nullptr, nullptr, nullptr },
6413     // Format_Mono,
6414     {
6415         blend_color_generic,
6416         nullptr, nullptr, nullptr, nullptr
6417     },
6418     // Format_MonoLSB,
6419     {
6420         blend_color_generic,
6421         nullptr, nullptr, nullptr, nullptr
6422     },
6423     // Format_Indexed8,
6424     {
6425         blend_color_generic,
6426         nullptr, nullptr, nullptr, nullptr
6427     },
6428     // Format_RGB32,
6429     {
6430         blend_color_argb,
6431         qt_bitmapblit_argb32,
6432         qt_alphamapblit_argb32,
6433         qt_alphargbblit_argb32,
6434         qt_rectfill_argb32
6435     },
6436     // Format_ARGB32,
6437     {
6438         blend_color_generic,
6439         qt_bitmapblit_argb32,
6440         qt_alphamapblit_argb32,
6441         qt_alphargbblit_argb32,
6442         qt_rectfill_nonpremul_argb32
6443     },
6444     // Format_ARGB32_Premultiplied
6445     {
6446         blend_color_argb,
6447         qt_bitmapblit_argb32,
6448         qt_alphamapblit_argb32,
6449         qt_alphargbblit_argb32,
6450         qt_rectfill_argb32
6451     },
6452     // Format_RGB16
6453     {
6454         blend_color_rgb16,
6455         qt_bitmapblit_quint16,
6456         qt_alphamapblit_quint16,
6457         qt_alphargbblit_generic,
6458         qt_rectfill_quint16
6459     },
6460     // Format_ARGB8565_Premultiplied
6461     {
6462         blend_color_generic,
6463         nullptr,
6464         qt_alphamapblit_generic,
6465         qt_alphargbblit_generic,
6466         qt_rectfill_quint24
6467     },
6468     // Format_RGB666
6469     {
6470         blend_color_generic,
6471         nullptr,
6472         qt_alphamapblit_generic,
6473         qt_alphargbblit_generic,
6474         qt_rectfill_quint24
6475     },
6476     // Format_ARGB6666_Premultiplied
6477     {
6478         blend_color_generic,
6479         nullptr,
6480         qt_alphamapblit_generic,
6481         qt_alphargbblit_generic,
6482         qt_rectfill_quint24
6483     },
6484     // Format_RGB555
6485     {
6486         blend_color_generic,
6487         nullptr,
6488         qt_alphamapblit_generic,
6489         qt_alphargbblit_generic,
6490         qt_rectfill_quint16
6491     },
6492     // Format_ARGB8555_Premultiplied
6493     {
6494         blend_color_generic,
6495         nullptr,
6496         qt_alphamapblit_generic,
6497         qt_alphargbblit_generic,
6498         qt_rectfill_quint24
6499     },
6500     // Format_RGB888
6501     {
6502         blend_color_generic,
6503         nullptr,
6504         qt_alphamapblit_generic,
6505         qt_alphargbblit_generic,
6506         qt_rectfill_quint24
6507     },
6508     // Format_RGB444
6509     {
6510         blend_color_generic,
6511         nullptr,
6512         qt_alphamapblit_generic,
6513         qt_alphargbblit_generic,
6514         qt_rectfill_quint16
6515     },
6516     // Format_ARGB4444_Premultiplied
6517     {
6518         blend_color_generic,
6519         nullptr,
6520         qt_alphamapblit_generic,
6521         qt_alphargbblit_generic,
6522         qt_rectfill_quint16
6523     },
6524     // Format_RGBX8888
6525     {
6526         blend_color_generic,
6527         qt_bitmapblit_rgba8888,
6528         qt_alphamapblit_generic,
6529         qt_alphargbblit_generic,
6530         qt_rectfill_rgba
6531     },
6532     // Format_RGBA8888
6533     {
6534         blend_color_generic,
6535         qt_bitmapblit_rgba8888,
6536         qt_alphamapblit_generic,
6537         qt_alphargbblit_generic,
6538         qt_rectfill_nonpremul_rgba
6539     },
6540     // Format_RGB8888_Premultiplied
6541     {
6542         blend_color_generic,
6543         qt_bitmapblit_rgba8888,
6544         qt_alphamapblit_generic,
6545         qt_alphargbblit_generic,
6546         qt_rectfill_rgba
6547     },
6548     // Format_BGR30
6549     {
6550         blend_color_generic_rgb64,
6551         qt_bitmapblit_rgb30<PixelOrderBGR>,
6552         qt_alphamapblit_generic,
6553         qt_alphargbblit_generic,
6554         qt_rectfill_rgb30<PixelOrderBGR>
6555     },
6556     // Format_A2BGR30_Premultiplied
6557     {
6558         blend_color_generic_rgb64,
6559         qt_bitmapblit_rgb30<PixelOrderBGR>,
6560         qt_alphamapblit_generic,
6561         qt_alphargbblit_generic,
6562         qt_rectfill_rgb30<PixelOrderBGR>
6563     },
6564     // Format_RGB30
6565     {
6566         blend_color_generic_rgb64,
6567         qt_bitmapblit_rgb30<PixelOrderRGB>,
6568         qt_alphamapblit_generic,
6569         qt_alphargbblit_generic,
6570         qt_rectfill_rgb30<PixelOrderRGB>
6571     },
6572     // Format_A2RGB30_Premultiplied
6573     {
6574         blend_color_generic_rgb64,
6575         qt_bitmapblit_rgb30<PixelOrderRGB>,
6576         qt_alphamapblit_generic,
6577         qt_alphargbblit_generic,
6578         qt_rectfill_rgb30<PixelOrderRGB>
6579     },
6580     // Format_Alpha8
6581     {
6582         blend_color_generic,
6583         nullptr,
6584         qt_alphamapblit_generic,
6585         qt_alphargbblit_generic,
6586         qt_rectfill_alpha
6587     },
6588     // Format_Grayscale8
6589     {
6590         blend_color_generic,
6591         nullptr,
6592         qt_alphamapblit_generic,
6593         qt_alphargbblit_generic,
6594         qt_rectfill_gray
6595     },
6596     // Format_RGBX64
6597     {
6598         blend_color_generic_rgb64,
6599         nullptr,
6600         qt_alphamapblit_generic,
6601         qt_alphargbblit_generic,
6602         qt_rectfill_quint64
6603     },
6604     // Format_RGBA64
6605     {
6606         blend_color_generic_rgb64,
6607         nullptr,
6608         qt_alphamapblit_generic,
6609         qt_alphargbblit_generic,
6610         qt_rectfill_quint64
6611     },
6612     // Format_RGBA64_Premultiplied
6613     {
6614         blend_color_generic_rgb64,
6615         nullptr,
6616         qt_alphamapblit_generic,
6617         qt_alphargbblit_generic,
6618         qt_rectfill_quint64
6619     },
6620     // Format_Grayscale16
6621     {
6622         blend_color_generic_rgb64,
6623         nullptr,
6624         qt_alphamapblit_generic,
6625         qt_alphargbblit_generic,
6626         qt_rectfill_quint16
6627     },
6628     // Format_BGR888
6629     {
6630         blend_color_generic,
6631         nullptr,
6632         qt_alphamapblit_generic,
6633         qt_alphargbblit_generic,
6634         qt_rectfill_quint24
6635     },
6636 };
6637 
6638 #if !defined(__SSE2__)
qt_memfill64(quint64 * dest,quint64 color,qsizetype count)6639 void qt_memfill64(quint64 *dest, quint64 color, qsizetype count)
6640 {
6641     qt_memfill_template<quint64>(dest, color, count);
6642 }
6643 #endif
6644 
6645 #if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG)
6646 __attribute__((optimize("no-tree-vectorize")))
6647 #endif
qt_memfill24(quint24 * dest,quint24 color,qsizetype count)6648 void qt_memfill24(quint24 *dest, quint24 color, qsizetype count)
6649 {
6650 #  ifdef QT_COMPILER_SUPPORTS_SSSE3
6651     extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype);
6652     if (qCpuHasFeature(SSSE3))
6653         return qt_memfill24_ssse3(dest, color, count);
6654 #  endif
6655 
6656     const quint32 v = color;
6657     quint24 *end = dest + count;
6658 
6659     // prolog: align dest to 32bit
6660     while ((quintptr(dest) & 0x3) && dest < end) {
6661         *dest++ = v;
6662     }
6663     if (dest >= end)
6664         return;
6665 
6666     const uint val1 = qFromBigEndian((v <<  8) | (v >> 16));
6667     const uint val2 = qFromBigEndian((v << 16) | (v >>  8));
6668     const uint val3 = qFromBigEndian((v << 24) | (v >>  0));
6669 
6670     for ( ; dest <= (end - 4); dest += 4) {
6671        quint32 *dst = reinterpret_cast<quint32 *>(dest);
6672        dst[0] = val1;
6673        dst[1] = val2;
6674        dst[2] = val3;
6675     }
6676 
6677     // less than 4px left
6678     switch (end - dest) {
6679     case 3:
6680         *dest++ = v;
6681         Q_FALLTHROUGH();
6682     case 2:
6683         *dest++ = v;
6684         Q_FALLTHROUGH();
6685     case 1:
6686         *dest++ = v;
6687     }
6688 }
6689 
qt_memfill16(quint16 * dest,quint16 value,qsizetype count)6690 void qt_memfill16(quint16 *dest, quint16 value, qsizetype count)
6691 {
6692     const int align = quintptr(dest) & 0x3;
6693     if (align) {
6694         *dest++ = value;
6695         --count;
6696     }
6697 
6698     if (count & 0x1)
6699         dest[count - 1] = value;
6700 
6701     const quint32 value32 = (value << 16) | value;
6702     qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / 2);
6703 }
6704 
6705 #if !defined(__SSE2__) && !defined(__ARM_NEON__) && !defined(__MIPS_DSP__)
qt_memfill32(quint32 * dest,quint32 color,qsizetype count)6706 void qt_memfill32(quint32 *dest, quint32 color, qsizetype count)
6707 {
6708     qt_memfill_template<quint32>(dest, color, count);
6709 }
6710 #endif
6711 #ifdef __SSE2__
6712 decltype(qt_memfill32_sse2) *qt_memfill32 = nullptr;
6713 decltype(qt_memfill64_sse2) *qt_memfill64 = nullptr;
6714 #endif
6715 
6716 #ifdef QT_COMPILER_SUPPORTS_SSE4_1
6717 template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *);
6718 #endif
6719 
6720 extern void qInitBlendFunctions();
6721 
qInitDrawhelperFunctions()6722 static void qInitDrawhelperFunctions()
6723 {
6724     // Set up basic blend function tables.
6725     qInitBlendFunctions();
6726 
6727 #ifdef __SSE2__
6728 #  ifndef __AVX2__
6729     qt_memfill32 = qt_memfill32_sse2;
6730     qt_memfill64 = qt_memfill64_sse2;
6731 #  endif
6732     qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
6733     qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
6734     qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
6735     qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
6736     qDrawHelper[QImage::Format_RGBX8888].bitmapBlit = qt_bitmapblit8888_sse2;
6737     qDrawHelper[QImage::Format_RGBA8888].bitmapBlit = qt_bitmapblit8888_sse2;
6738     qDrawHelper[QImage::Format_RGBA8888_Premultiplied].bitmapBlit = qt_bitmapblit8888_sse2;
6739 
6740     extern void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6741                                                      const uchar *srcPixels, int sbpl, int srch,
6742                                                      const QRectF &targetRect,
6743                                                      const QRectF &sourceRect,
6744                                                      const QRect &clip,
6745                                                      int const_alpha);
6746     qScaleFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6747     qScaleFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6748     qScaleFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6749     qScaleFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6750 
6751     extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
6752                                              const uchar *srcPixels, int sbpl,
6753                                              int w, int h,
6754                                              int const_alpha);
6755     extern void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6756                                                const uchar *srcPixels, int sbpl,
6757                                                int w, int h,
6758                                                int const_alpha);
6759 
6760     qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6761     qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6762     qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6763     qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6764     qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6765     qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6766     qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6767     qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6768 
6769     extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
6770                                                                   int y, int x, int length);
6771 
6772     qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
6773 
6774     extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6775     extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6776     extern void QT_FASTCALL comp_func_Source_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6777     extern void QT_FASTCALL comp_func_solid_Source_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6778     extern void QT_FASTCALL comp_func_Plus_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6779     qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_sse2;
6780     qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_sse2;
6781     qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
6782     qt_functionForModeSolid_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_sse2;
6783     qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
6784 
6785 #ifdef QT_COMPILER_SUPPORTS_SSSE3
6786     if (qCpuHasFeature(SSSE3)) {
6787         extern void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
6788                                                     const uchar *srcPixels, int sbpl,
6789                                                     int w, int h,
6790                                                     int const_alpha);
6791 
6792         extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data,
6793                                                                         int y, int x, int length);
6794         qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6795         qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6796         qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6797         qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6798         sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3;
6799         extern void QT_FASTCALL rbSwap_888_ssse3(uchar *dst, const uchar *src, int count);
6800         qPixelLayouts[QImage::Format_RGB888].rbSwap = rbSwap_888_ssse3;
6801         qPixelLayouts[QImage::Format_BGR888].rbSwap = rbSwap_888_ssse3;
6802     }
6803 #endif // SSSE3
6804 
6805 #if defined(QT_COMPILER_SUPPORTS_SSE4_1)
6806     if (qCpuHasFeature(SSE4_1)) {
6807         extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *);
6808         extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *);
6809         extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6810                                                                   const QVector<QRgb> *, QDitherInfo *);
6811         extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6812                                                                     const QVector<QRgb> *, QDitherInfo *);
6813         extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6814                                                                         const QVector<QRgb> *, QDitherInfo *);
6815         extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6816                                                                           const QVector<QRgb> *, QDitherInfo *);
6817         extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6818                                                                      const QVector<QRgb> *, QDitherInfo *);
6819         extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6820                                                                        const QVector<QRgb> *, QDitherInfo *);
6821         extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6822                                                                       const QVector<QRgb> *, QDitherInfo *);
6823         extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6824                                                                         const QVector<QRgb> *, QDitherInfo *);
6825         extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6826                                                                     const QVector<QRgb> *, QDitherInfo *);
6827         extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6828                                                              const QVector<QRgb> *, QDitherInfo *);
6829         extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6830                                                               const QVector<QRgb> *, QDitherInfo *);
6831         extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6832         extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6833 #  ifndef __AVX2__
6834         qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
6835         qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
6836         qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4;
6837         qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
6838         qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_sse4;
6839         qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_sse4;
6840         qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6841         qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6842         qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6843         qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6844 #  endif
6845         qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4;
6846         qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4;
6847         qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4;
6848         qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>;
6849         qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>;
6850         qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4;
6851         qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4;
6852 #if QT_CONFIG(raster_64bit)
6853         destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4;
6854         destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4;
6855 #endif
6856     }
6857 #endif
6858 
6859 #if defined(QT_COMPILER_SUPPORTS_AVX2)
6860     if (qCpuHasFeature(ArchHaswell)) {
6861         qt_memfill32 = qt_memfill32_avx2;
6862         qt_memfill64 = qt_memfill64_avx2;
6863         extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
6864                                                  const uchar *srcPixels, int sbpl,
6865                                                  int w, int h, int const_alpha);
6866         extern void qt_blend_argb32_on_argb32_avx2(uchar *destPixels, int dbpl,
6867                                                    const uchar *srcPixels, int sbpl,
6868                                                    int w, int h, int const_alpha);
6869         qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6870         qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6871         qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6872         qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6873         qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6874         qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6875         qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6876         qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6877 
6878         extern void QT_FASTCALL comp_func_Source_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6879         extern void QT_FASTCALL comp_func_SourceOver_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6880         extern void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint *destPixels, int length, uint color, uint const_alpha);
6881         qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_avx2;
6882         qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_avx2;
6883         qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2;
6884 #if QT_CONFIG(raster_64bit)
6885         extern void QT_FASTCALL comp_func_Source_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6886         extern void QT_FASTCALL comp_func_SourceOver_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6887         extern void QT_FASTCALL comp_func_solid_SourceOver_rgb64_avx2(QRgba64 *destPixels, int length, QRgba64 color, uint const_alpha);
6888         qt_functionForMode64_C[QPainter::CompositionMode_Source] = comp_func_Source_rgb64_avx2;
6889         qt_functionForMode64_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgb64_avx2;
6890         qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2;
6891 #endif
6892 
6893         extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6894                                                                                           int &fx, int &fy, int fdx, int /*fdy*/);
6895         extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6896                                                                                        int &fx, int &fy, int fdx, int /*fdy*/);
6897         extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image,
6898                                                                                          int &fx, int &fy, int fdx, int fdy);
6899 
6900         bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2;
6901         bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2;
6902         bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2;
6903 
6904         extern void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *);
6905         extern void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *);
6906         extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6907                                                                   const QVector<QRgb> *, QDitherInfo *);
6908         extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6909                                                                     const QVector<QRgb> *, QDitherInfo *);
6910         qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_avx2;
6911         qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
6912         qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2;
6913         qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
6914 
6915 #if QT_CONFIG(raster_64bit)
6916         extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *, const uint *, int, const QVector<QRgb> *, QDitherInfo *);
6917         extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uint *, int count, const QVector<QRgb> *, QDitherInfo *);
6918         extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QVector<QRgb> *, QDitherInfo *);
6919         extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QVector<QRgb> *, QDitherInfo *);
6920         qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2;
6921         qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2;
6922         qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2;
6923         qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2;
6924 #endif
6925     }
6926 #endif
6927 
6928 #endif // SSE2
6929 
6930 #if defined(__ARM_NEON__)
6931     qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6932     qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6933     qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6934     qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6935 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6936     qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6937     qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6938     qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6939     qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6940 #endif
6941 
6942     qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
6943     qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
6944     qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
6945 
6946     extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
6947                                                                   int y, int x, int length);
6948 
6949     qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
6950 
6951     sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
6952 
6953 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6954     extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *);
6955     extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *);
6956     extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6957                                                               const QVector<QRgb> *, QDitherInfo *);
6958     extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6959                                                                 const QVector<QRgb> *, QDitherInfo *);
6960    extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6961                                                                    const QVector<QRgb> *, QDitherInfo *);
6962    extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6963                                                                      const QVector<QRgb> *, QDitherInfo *);
6964    extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6965                                                                 const QVector<QRgb> *, QDitherInfo *);
6966    extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6967                                                                   const QVector<QRgb> *, QDitherInfo *);
6968     extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6969                                                          const QVector<QRgb> *, QDitherInfo *);
6970     extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6971                                                            const QVector<QRgb> *, QDitherInfo *);
6972     extern void QT_FASTCALL storeRGBXFromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6973                                                        const QVector<QRgb> *, QDitherInfo *);
6974     qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon;
6975     qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
6976     qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon;
6977     qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_neon;
6978     qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_neon;
6979     qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon;
6980     qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
6981     qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon;
6982     qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6983     qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6984     qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon;
6985     qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6986     qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6987 #endif
6988 
6989 #if defined(ENABLE_PIXMAN_DRAWHELPERS)
6990     // The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
6991     qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
6992     qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
6993     qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
6994 
6995     qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
6996     qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
6997 
6998     qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
6999     qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
7000 
7001     qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
7002 
7003     destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
7004     destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
7005 
7006     qMemRotateFunctions[QPixelLayout::BPP16][0] = qt_memrotate90_16_neon;
7007     qMemRotateFunctions[QPixelLayout::BPP16][2] = qt_memrotate270_16_neon;
7008 #endif
7009 #endif // defined(__ARM_NEON__)
7010 
7011 #if defined(__MIPS_DSP__)
7012     // Composition functions are all DSP r1
7013     qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
7014     qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
7015     qt_functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
7016     qt_functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
7017     qt_functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
7018     qt_functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
7019     qt_functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
7020     qt_functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
7021     qt_functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
7022     qt_functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
7023 
7024     qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
7025     qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
7026     qt_functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
7027     qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
7028     qt_functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
7029     qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
7030     qt_functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
7031     qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
7032 
7033     qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
7034     qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
7035     qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
7036     qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
7037 
7038     destFetchProc[QImage::Format_ARGB32] = qt_destFetchARGB32_mips_dsp;
7039 
7040     destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
7041 
7042     sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
7043     sourceFetchUntransformed[QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
7044     sourceFetchUntransformed[QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
7045 
7046 #if defined(__MIPS_DSPR2__)
7047     qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
7048     sourceFetchUntransformed[QImage::Format_RGB16] = qt_fetchUntransformedRGB16_mips_dspr2;
7049 #else
7050     qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
7051 #endif // defined(__MIPS_DSPR2__)
7052 #endif // defined(__MIPS_DSP__)
7053 }
7054 
7055 // Ensure initialization if this object file is linked.
7056 Q_CONSTRUCTOR_FUNCTION(qInitDrawhelperFunctions);
7057 
7058 QT_END_NAMESPACE
7059