1 /****************************************************************************
2 **
3 ** Copyright (C) 2018 The Qt Company Ltd.
4 ** Copyright (C) 2018 Intel Corporation.
5 ** Contact: https://www.qt.io/licensing/
6 **
7 ** This file is part of the QtGui module of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** Commercial License Usage
11 ** Licensees holding valid commercial Qt licenses may use this file in
12 ** accordance with the commercial license agreement provided with the
13 ** Software or, alternatively, in accordance with the terms contained in
14 ** a written agreement between you and The Qt Company. For licensing terms
15 ** and conditions see https://www.qt.io/terms-conditions. For further
16 ** information use the contact form at https://www.qt.io/contact-us.
17 **
18 ** GNU Lesser General Public License Usage
19 ** Alternatively, this file may be used under the terms of the GNU Lesser
20 ** General Public License version 3 as published by the Free Software
21 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
22 ** packaging of this file. Please review the following information to
23 ** ensure the GNU Lesser General Public License version 3 requirements
24 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25 **
26 ** GNU General Public License Usage
27 ** Alternatively, this file may be used under the terms of the GNU
28 ** General Public License version 2.0 or (at your option) the GNU General
29 ** Public license version 3 or any later version approved by the KDE Free
30 ** Qt Foundation. The licenses are as published by the Free Software
31 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32 ** included in the packaging of this file. Please review the following
33 ** information to ensure the GNU General Public License requirements will
34 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35 ** https://www.gnu.org/licenses/gpl-3.0.html.
36 **
37 ** $QT_END_LICENSE$
38 **
39 ****************************************************************************/
40
41 #include <qglobal.h>
42
43 #include <qstylehints.h>
44 #include <qguiapplication.h>
45 #include <qatomic.h>
46 #include <private/qcolortrclut_p.h>
47 #include <private/qdrawhelper_p.h>
48 #include <private/qpaintengine_raster_p.h>
49 #include <private/qpainter_p.h>
50 #include <private/qdrawhelper_x86_p.h>
51 #include <private/qdrawingprimitive_sse2_p.h>
52 #include <private/qdrawhelper_neon_p.h>
53 #if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
54 #include <private/qdrawhelper_mips_dsp_p.h>
55 #endif
56 #include <private/qguiapplication_p.h>
57 #include <private/qrgba64_p.h>
58 #include <qendian.h>
59 #include <qloggingcategory.h>
60 #include <qmath.h>
61
62 QT_BEGIN_NAMESPACE
63
64 Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
65
66 #define MASK(src, a) src = BYTE_MUL(src, a)
67
68 /*
69 constants and structures
70 */
71
72 enum {
73 fixed_scale = 1 << 16,
74 half_point = 1 << 15
75 };
76
77 template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth();
78 template<QImage::Format> Q_DECL_CONSTEXPR uint redShift();
79 template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth();
80 template<QImage::Format> Q_DECL_CONSTEXPR uint greenShift();
81 template<QImage::Format> Q_DECL_CONSTEXPR uint blueWidth();
82 template<QImage::Format> Q_DECL_CONSTEXPR uint blueShift();
83 template<QImage::Format> Q_DECL_CONSTEXPR uint alphaWidth();
84 template<QImage::Format> Q_DECL_CONSTEXPR uint alphaShift();
85
redWidth()86 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB16>() { return 5; }
redWidth()87 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB444>() { return 4; }
redWidth()88 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB555>() { return 5; }
redWidth()89 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB666>() { return 6; }
redWidth()90 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB888>() { return 8; }
redWidth()91 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_BGR888>() { return 8; }
redWidth()92 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
redWidth()93 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
redWidth()94 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; }
redWidth()95 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
redWidth()96 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBX8888>() { return 8; }
redWidth()97 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888>() { return 8; }
redWidth()98 template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
99
redShift()100 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB16>() { return 11; }
redShift()101 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB444>() { return 8; }
redShift()102 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB555>() { return 10; }
redShift()103 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB666>() { return 12; }
redShift()104 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB888>() { return 16; }
redShift()105 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_BGR888>() { return 0; }
redShift()106 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB4444_Premultiplied>() { return 8; }
redShift()107 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8555_Premultiplied>() { return 18; }
redShift()108 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8565_Premultiplied>() { return 19; }
redShift()109 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB6666_Premultiplied>() { return 12; }
110 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
redShift()111 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 24; }
redShift()112 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 24; }
redShift()113 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; }
114 #else
redShift()115 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 0; }
redShift()116 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 0; }
redShift()117 template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; }
118 #endif
greenWidth()119 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB16>() { return 6; }
greenWidth()120 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB444>() { return 4; }
greenWidth()121 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB555>() { return 5; }
greenWidth()122 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB666>() { return 6; }
greenWidth()123 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB888>() { return 8; }
greenWidth()124 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_BGR888>() { return 8; }
greenWidth()125 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
greenWidth()126 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
greenWidth()127 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8565_Premultiplied>() { return 6; }
greenWidth()128 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
greenWidth()129 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBX8888>() { return 8; }
greenWidth()130 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888>() { return 8; }
greenWidth()131 template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
132
greenShift()133 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB16>() { return 5; }
greenShift()134 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB444>() { return 4; }
greenShift()135 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB555>() { return 5; }
greenShift()136 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB666>() { return 6; }
greenShift()137 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB888>() { return 8; }
greenShift()138 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_BGR888>() { return 8; }
greenShift()139 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
greenShift()140 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8555_Premultiplied>() { return 13; }
greenShift()141 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8565_Premultiplied>() { return 13; }
greenShift()142 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
143 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
greenShift()144 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 16; }
greenShift()145 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 16; }
greenShift()146 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; }
147 #else
greenShift()148 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 8; }
greenShift()149 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 8; }
greenShift()150 template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
151 #endif
blueWidth()152 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB16>() { return 5; }
blueWidth()153 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB444>() { return 4; }
blueWidth()154 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB555>() { return 5; }
blueWidth()155 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB666>() { return 6; }
blueWidth()156 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB888>() { return 8; }
blueWidth()157 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_BGR888>() { return 8; }
blueWidth()158 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
blueWidth()159 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
blueWidth()160 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; }
blueWidth()161 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
blueWidth()162 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBX8888>() { return 8; }
blueWidth()163 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888>() { return 8; }
blueWidth()164 template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
165
blueShift()166 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB16>() { return 0; }
blueShift()167 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB444>() { return 0; }
blueShift()168 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB555>() { return 0; }
blueShift()169 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB666>() { return 0; }
blueShift()170 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB888>() { return 0; }
blueShift()171 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_BGR888>() { return 16; }
blueShift()172 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB4444_Premultiplied>() { return 0; }
blueShift()173 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8555_Premultiplied>() { return 8; }
blueShift()174 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8565_Premultiplied>() { return 8; }
blueShift()175 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB6666_Premultiplied>() { return 0; }
176 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
blueShift()177 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 8; }
blueShift()178 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 8; }
blueShift()179 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
180 #else
blueShift()181 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 16; }
blueShift()182 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 16; }
blueShift()183 template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; }
184 #endif
alphaWidth()185 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB16>() { return 0; }
alphaWidth()186 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB444>() { return 0; }
alphaWidth()187 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB555>() { return 0; }
alphaWidth()188 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB666>() { return 0; }
alphaWidth()189 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB888>() { return 0; }
alphaWidth()190 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_BGR888>() { return 0; }
alphaWidth()191 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
alphaWidth()192 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8555_Premultiplied>() { return 8; }
alphaWidth()193 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8565_Premultiplied>() { return 8; }
alphaWidth()194 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
alphaWidth()195 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBX8888>() { return 0; }
alphaWidth()196 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888>() { return 8; }
alphaWidth()197 template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
198
alphaShift()199 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB16>() { return 0; }
alphaShift()200 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB444>() { return 0; }
alphaShift()201 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB555>() { return 0; }
alphaShift()202 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB666>() { return 0; }
alphaShift()203 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB888>() { return 0; }
alphaShift()204 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_BGR888>() { return 0; }
alphaShift()205 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB4444_Premultiplied>() { return 12; }
alphaShift()206 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8555_Premultiplied>() { return 0; }
alphaShift()207 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8565_Premultiplied>() { return 0; }
alphaShift()208 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB6666_Premultiplied>() { return 18; }
209 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
alphaShift()210 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 0; }
alphaShift()211 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 0; }
alphaShift()212 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; }
213 #else
alphaShift()214 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 24; }
alphaShift()215 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 24; }
alphaShift()216 template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; }
217 #endif
218
219 template<QImage::Format> constexpr QPixelLayout::BPP bitsPerPixel();
bitsPerPixel()220 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; }
bitsPerPixel()221 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; }
bitsPerPixel()222 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; }
bitsPerPixel()223 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; }
bitsPerPixel()224 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; }
bitsPerPixel()225 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_BGR888>() { return QPixelLayout::BPP24; }
bitsPerPixel()226 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; }
bitsPerPixel()227 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; }
bitsPerPixel()228 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; }
bitsPerPixel()229 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; }
bitsPerPixel()230 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBX8888>() { return QPixelLayout::BPP32; }
bitsPerPixel()231 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888>() { return QPixelLayout::BPP32; }
bitsPerPixel()232 template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888_Premultiplied>() { return QPixelLayout::BPP32; }
233
234
235 typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count);
236
237 template <QPixelLayout::BPP bpp> static
fetchPixel(const uchar *,int)238 uint QT_FASTCALL fetchPixel(const uchar *, int)
239 {
240 Q_UNREACHABLE();
241 return 0;
242 }
243
244 template <>
fetchPixel(const uchar * src,int index)245 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index)
246 {
247 return (src[index >> 3] >> (index & 7)) & 1;
248 }
249
250 template <>
fetchPixel(const uchar * src,int index)251 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index)
252 {
253 return (src[index >> 3] >> (~index & 7)) & 1;
254 }
255
256 template <>
fetchPixel(const uchar * src,int index)257 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index)
258 {
259 return src[index];
260 }
261
262 template <>
fetchPixel(const uchar * src,int index)263 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index)
264 {
265 return reinterpret_cast<const quint16 *>(src)[index];
266 }
267
268 template <>
fetchPixel(const uchar * src,int index)269 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index)
270 {
271 return reinterpret_cast<const quint24 *>(src)[index];
272 }
273
274 template <>
fetchPixel(const uchar * src,int index)275 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index)
276 {
277 return reinterpret_cast<const uint *>(src)[index];
278 }
279
280 template <>
fetchPixel(const uchar * src,int index)281 inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP64>(const uchar *src, int index)
282 {
283 // We have to do the conversion in fetch to fit into a 32bit uint
284 QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
285 return c.toArgb32();
286 }
287
288 template <QPixelLayout::BPP bpp>
fetchPixel64(const uchar * src,int index)289 static quint64 QT_FASTCALL fetchPixel64(const uchar *src, int index)
290 {
291 Q_STATIC_ASSERT(bpp != QPixelLayout::BPP64);
292 return fetchPixel<bpp>(src, index);
293 }
294
295 template <QPixelLayout::BPP width> static
296 void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel);
297
298 template <>
storePixel(uchar * dest,int index,uint pixel)299 inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel)
300 {
301 reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel);
302 }
303
304 template <>
storePixel(uchar * dest,int index,uint pixel)305 inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel)
306 {
307 reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel);
308 }
309
310 typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index);
311
312 static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = {
313 nullptr, // BPPNone
314 fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB
315 fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB
316 fetchPixel<QPixelLayout::BPP8>, // BPP8
317 fetchPixel<QPixelLayout::BPP16>, // BPP16
318 fetchPixel<QPixelLayout::BPP24>, // BPP24
319 fetchPixel<QPixelLayout::BPP32>, // BPP32
320 fetchPixel<QPixelLayout::BPP64> // BPP64
321 };
322
323 template<QImage::Format Format>
convertPixelToRGB32(uint s)324 static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s)
325 {
326 Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
327 Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
328 Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
329
330 Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
331 Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
332 Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
333
334 Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
335 Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
336 Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
337
338 uint red = (s >> redShift<Format>()) & redMask;
339 uint green = (s >> greenShift<Format>()) & greenMask;
340 uint blue = (s >> blueShift<Format>()) & blueMask;
341
342 red = ((red << redLeftShift) | (red >> redRightShift)) << 16;
343 green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8;
344 blue = (blue << blueLeftShift) | (blue >> blueRightShift);
345 return 0xff000000 | red | green | blue;
346 }
347
348 template<QImage::Format Format>
convertToRGB32(uint * buffer,int count,const QVector<QRgb> *)349 static void QT_FASTCALL convertToRGB32(uint *buffer, int count, const QVector<QRgb> *)
350 {
351 for (int i = 0; i < count; ++i)
352 buffer[i] = convertPixelToRGB32<Format>(buffer[i]);
353 }
354
355 #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
356 extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count);
357 #endif
358
359 template<QImage::Format Format>
fetchRGBToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)360 static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, int index, int count,
361 const QVector<QRgb> *, QDitherInfo *)
362 {
363 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
364 #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
365 if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
366 // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
367 // to vectorize the deforested version below.
368 fetchPixelsBPP24_ssse3(buffer, src, index, count);
369 convertToRGB32<Format>(buffer, count, nullptr);
370 return buffer;
371 }
372 #endif
373 for (int i = 0; i < count; ++i)
374 buffer[i] = convertPixelToRGB32<Format>(fetchPixel<BPP>(src, index + i));
375 return buffer;
376 }
377
378 template<QImage::Format Format>
convertPixelToRGB64(uint s)379 static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s)
380 {
381 return QRgba64::fromArgb32(convertPixelToRGB32<Format>(s));
382 }
383
384 template<QImage::Format Format>
convertToRGB64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)385 static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count,
386 const QVector<QRgb> *, QDitherInfo *)
387 {
388 for (int i = 0; i < count; ++i)
389 buffer[i] = convertPixelToRGB64<Format>(src[i]);
390 return buffer;
391 }
392
393 template<QImage::Format Format>
fetchRGBToRGB64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)394 static const QRgba64 *QT_FASTCALL fetchRGBToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
395 const QVector<QRgb> *, QDitherInfo *)
396 {
397 for (int i = 0; i < count; ++i)
398 buffer[i] = convertPixelToRGB64<Format>(fetchPixel<bitsPerPixel<Format>()>(src, index + i));
399 return buffer;
400 }
401
402 template<QImage::Format Format>
convertPixelToARGB32PM(uint s)403 static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s)
404 {
405 Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1);
406 Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
407 Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
408 Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
409
410 Q_CONSTEXPR uchar alphaLeftShift = 8 - alphaWidth<Format>();
411 Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
412 Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
413 Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
414
415 Q_CONSTEXPR uchar alphaRightShift = 2 * alphaWidth<Format>() - 8;
416 Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
417 Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
418 Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
419
420 Q_CONSTEXPR bool mustMin = (alphaWidth<Format>() != redWidth<Format>()) ||
421 (alphaWidth<Format>() != greenWidth<Format>()) ||
422 (alphaWidth<Format>() != blueWidth<Format>());
423
424 uint alpha = (s >> alphaShift<Format>()) & alphaMask;
425 uint red = (s >> redShift<Format>()) & redMask;
426 uint green = (s >> greenShift<Format>()) & greenMask;
427 uint blue = (s >> blueShift<Format>()) & blueMask;
428
429 alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift);
430 red = (red << redLeftShift) | (red >> redRightShift);
431 green = (green << greenLeftShift) | (green >> greenRightShift);
432 blue = (blue << blueLeftShift) | (blue >> blueRightShift);
433
434 if (mustMin) {
435 red = qMin(alpha, red);
436 green = qMin(alpha, green);
437 blue = qMin(alpha, blue);
438 }
439
440 return (alpha << 24) | (red << 16) | (green << 8) | blue;
441 }
442
443 template<QImage::Format Format>
convertARGBPMToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)444 static void QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
445 {
446 for (int i = 0; i < count; ++i)
447 buffer[i] = convertPixelToARGB32PM<Format>(buffer[i]);
448 }
449
450 template<QImage::Format Format>
fetchARGBPMToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)451 static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *src, int index, int count,
452 const QVector<QRgb> *, QDitherInfo *)
453 {
454 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
455 #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
456 if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
457 // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
458 // to vectorize the deforested version below.
459 fetchPixelsBPP24_ssse3(buffer, src, index, count);
460 convertARGBPMToARGB32PM<Format>(buffer, count, nullptr);
461 return buffer;
462 }
463 #endif
464 for (int i = 0; i < count; ++i)
465 buffer[i] = convertPixelToARGB32PM<Format>(fetchPixel<BPP>(src, index + i));
466 return buffer;
467 }
468
469 template<QImage::Format Format>
convertPixelToRGBA64PM(uint s)470 static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s)
471 {
472 return QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(s));
473 }
474
475 template<QImage::Format Format>
convertARGBPMToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)476 static const QRgba64 *QT_FASTCALL convertARGBPMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
477 const QVector<QRgb> *, QDitherInfo *)
478 {
479 for (int i = 0; i < count; ++i)
480 buffer[i] = convertPixelToRGB64<Format>(src[i]);
481 return buffer;
482 }
483
484 template<QImage::Format Format>
fetchARGBPMToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)485 static const QRgba64 *QT_FASTCALL fetchARGBPMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
486 const QVector<QRgb> *, QDitherInfo *)
487 {
488 constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
489 for (int i = 0; i < count; ++i)
490 buffer[i] = convertPixelToRGBA64PM<Format>(fetchPixel<bpp>(src, index + i));
491 return buffer;
492 }
493
494 template<QImage::Format Format, bool fromRGB>
storeRGBFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo * dither)495 static void QT_FASTCALL storeRGBFromARGB32PM(uchar *dest, const uint *src, int index, int count,
496 const QVector<QRgb> *, QDitherInfo *dither)
497 {
498 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
499 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
500 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
501 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
502
503 // RGB32 -> RGB888 is not a precision loss.
504 if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) {
505 Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
506 Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
507 Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
508 Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
509 Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
510 Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>();
511
512 for (int i = 0; i < count; ++i) {
513 const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]);
514 const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
515 const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
516 const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
517 storePixel<BPP>(dest, index + i, r | g | b);
518 };
519 } else {
520 // We do ordered dither by using a rounding conversion, but instead of
521 // adding half of input precision, we add the adjusted result from the
522 // bayer matrix before narrowing.
523 // Note: Rounding conversion in itself is different from the naive
524 // conversion we do above for non-dithering.
525 const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
526 for (int i = 0; i < count; ++i) {
527 const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]);
528 const int d = bayer_line[(dither->x + i) & 15];
529 const int dr = d - ((d + 1) >> rWidth);
530 const int dg = d - ((d + 1) >> gWidth);
531 const int db = d - ((d + 1) >> bWidth);
532 int r = qRed(c);
533 int g = qGreen(c);
534 int b = qBlue(c);
535 r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth);
536 g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth);
537 b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth);
538 const uint s = (r << redShift<Format>())
539 | (g << greenShift<Format>())
540 | (b << blueShift<Format>());
541 storePixel<BPP>(dest, index + i, s);
542 }
543 }
544 }
545
546 template<QImage::Format Format, bool fromRGB>
storeARGBPMFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo * dither)547 static void QT_FASTCALL storeARGBPMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
548 const QVector<QRgb> *, QDitherInfo *dither)
549 {
550 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
551 if (!dither) {
552 Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1;
553 Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
554 Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
555 Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
556
557 Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>();
558 Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
559 Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
560 Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>();
561
562 Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>();
563 for (int i = 0; i < count; ++i) {
564 const uint c = src[i];
565 const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>());
566 const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
567 const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
568 const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
569 storePixel<BPP>(dest, index + i, a | r | g | b);
570 };
571 } else {
572 Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
573 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
574 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
575 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
576
577 const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
578 for (int i = 0; i < count; ++i) {
579 const uint c = src[i];
580 const int d = bayer_line[(dither->x + i) & 15];
581 const int da = d - ((d + 1) >> aWidth);
582 const int dr = d - ((d + 1) >> rWidth);
583 const int dg = d - ((d + 1) >> gWidth);
584 const int db = d - ((d + 1) >> bWidth);
585 int a = qAlpha(c);
586 int r = qRed(c);
587 int g = qGreen(c);
588 int b = qBlue(c);
589 if (fromRGB)
590 a = (1 << aWidth) - 1;
591 else
592 a = (a + ((da - a) >> aWidth) + 1) >> (8 - aWidth);
593 r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth);
594 g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth);
595 b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth);
596 uint s = (a << alphaShift<Format>())
597 | (r << redShift<Format>())
598 | (g << greenShift<Format>())
599 | (b << blueShift<Format>());
600 storePixel<BPP>(dest, index + i, s);
601 }
602 }
603 }
604
605 template<QImage::Format Format>
rbSwap(uchar * dst,const uchar * src,int count)606 static void QT_FASTCALL rbSwap(uchar *dst, const uchar *src, int count)
607 {
608 Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
609 Q_CONSTEXPR uchar aShift = alphaShift<Format>();
610 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
611 Q_CONSTEXPR uchar rShift = redShift<Format>();
612 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
613 Q_CONSTEXPR uchar gShift = greenShift<Format>();
614 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
615 Q_CONSTEXPR uchar bShift = blueShift<Format>();
616 #ifdef Q_COMPILER_CONSTEXPR
617 Q_STATIC_ASSERT(rWidth == bWidth);
618 #endif
619 Q_CONSTEXPR uint redBlueMask = (1 << rWidth) - 1;
620 Q_CONSTEXPR uint alphaGreenMask = (((1 << aWidth) - 1) << aShift)
621 | (((1 << gWidth) - 1) << gShift);
622 constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
623
624 for (int i = 0; i < count; ++i) {
625 const uint c = fetchPixel<bpp>(src, i);
626 const uint r = (c >> rShift) & redBlueMask;
627 const uint b = (c >> bShift) & redBlueMask;
628 const uint t = (c & alphaGreenMask)
629 | (r << bShift)
630 | (b << rShift);
631 storePixel<bpp>(dst, i, t);
632 }
633 }
634
rbSwap_rgb32(uchar * d,const uchar * s,int count)635 static void QT_FASTCALL rbSwap_rgb32(uchar *d, const uchar *s, int count)
636 {
637 const uint *src = reinterpret_cast<const uint *>(s);
638 uint *dest = reinterpret_cast<uint *>(d);
639 for (int i = 0; i < count; ++i) {
640 const uint c = src[i];
641 const uint ag = c & 0xff00ff00;
642 const uint rb = c & 0x00ff00ff;
643 dest[i] = ag | (rb << 16) | (rb >> 16);
644 }
645 }
646
647 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
648 template<>
rbSwap(uchar * d,const uchar * s,int count)649 void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count)
650 {
651 return rbSwap_rgb32(d, s, count);
652 }
653 #else
654 template<>
rbSwap(uchar * d,const uchar * s,int count)655 void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count)
656 {
657 const uint *src = reinterpret_cast<const uint *>(s);
658 uint *dest = reinterpret_cast<uint *>(d);
659 for (int i = 0; i < count; ++i) {
660 const uint c = src[i];
661 const uint rb = c & 0xff00ff00;
662 const uint ga = c & 0x00ff00ff;
663 dest[i] = ga | (rb << 16) | (rb >> 16);
664 }
665 }
666 #endif
667
rbSwap_rgb30(uchar * d,const uchar * s,int count)668 static void QT_FASTCALL rbSwap_rgb30(uchar *d, const uchar *s, int count)
669 {
670 const uint *src = reinterpret_cast<const uint *>(s);
671 uint *dest = reinterpret_cast<uint *>(d);
672 UNALIASED_CONVERSION_LOOP(dest, src, count, qRgbSwapRgb30);
673 }
674
pixelLayoutRGB()675 template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutRGB()
676 {
677 return QPixelLayout{
678 false,
679 false,
680 bitsPerPixel<Format>(),
681 rbSwap<Format>,
682 convertToRGB32<Format>,
683 convertToRGB64<Format>,
684 fetchRGBToRGB32<Format>,
685 fetchRGBToRGB64<Format>,
686 storeRGBFromARGB32PM<Format, false>,
687 storeRGBFromARGB32PM<Format, true>
688 };
689 }
690
pixelLayoutARGBPM()691 template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutARGBPM()
692 {
693 return QPixelLayout{
694 true,
695 true,
696 bitsPerPixel<Format>(),
697 rbSwap<Format>,
698 convertARGBPMToARGB32PM<Format>,
699 convertARGBPMToRGBA64PM<Format>,
700 fetchARGBPMToARGB32PM<Format>,
701 fetchARGBPMToRGBA64PM<Format>,
702 storeARGBPMFromARGB32PM<Format, false>,
703 storeARGBPMFromARGB32PM<Format, true>
704 };
705 }
706
convertIndexedToARGB32PM(uint * buffer,int count,const QVector<QRgb> * clut)707 static void QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, int count, const QVector<QRgb> *clut)
708 {
709 for (int i = 0; i < count; ++i)
710 buffer[i] = qPremultiply(clut->at(buffer[i]));
711 }
712
713 template<QPixelLayout::BPP BPP>
fetchIndexedToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> * clut,QDitherInfo *)714 static const uint *QT_FASTCALL fetchIndexedToARGB32PM(uint *buffer, const uchar *src, int index, int count,
715 const QVector<QRgb> *clut, QDitherInfo *)
716 {
717 for (int i = 0; i < count; ++i) {
718 const uint s = fetchPixel<BPP>(src, index + i);
719 buffer[i] = qPremultiply(clut->at(s));
720 }
721 return buffer;
722 }
723
724 template<QPixelLayout::BPP BPP>
fetchIndexedToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> * clut,QDitherInfo *)725 static const QRgba64 *QT_FASTCALL fetchIndexedToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
726 const QVector<QRgb> *clut, QDitherInfo *)
727 {
728 for (int i = 0; i < count; ++i) {
729 const uint s = fetchPixel<BPP>(src, index + i);
730 buffer[i] = QRgba64::fromArgb32(clut->at(s)).premultiplied();
731 }
732 return buffer;
733 }
734
convertIndexedToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> * clut,QDitherInfo *)735 static const QRgba64 *QT_FASTCALL convertIndexedToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
736 const QVector<QRgb> *clut, QDitherInfo *)
737 {
738 for (int i = 0; i < count; ++i)
739 buffer[i] = QRgba64::fromArgb32(clut->at(src[i])).premultiplied();
740 return buffer;
741 }
742
convertPassThrough(uint *,int,const QVector<QRgb> *)743 static void QT_FASTCALL convertPassThrough(uint *, int, const QVector<QRgb> *)
744 {
745 }
746
fetchPassThrough(uint *,const uchar * src,int index,int,const QVector<QRgb> *,QDitherInfo *)747 static const uint *QT_FASTCALL fetchPassThrough(uint *, const uchar *src, int index, int,
748 const QVector<QRgb> *, QDitherInfo *)
749 {
750 return reinterpret_cast<const uint *>(src) + index;
751 }
752
fetchPassThrough64(QRgba64 *,const uchar * src,int index,int,const QVector<QRgb> *,QDitherInfo *)753 static const QRgba64 *QT_FASTCALL fetchPassThrough64(QRgba64 *, const uchar *src, int index, int,
754 const QVector<QRgb> *, QDitherInfo *)
755 {
756 return reinterpret_cast<const QRgba64 *>(src) + index;
757 }
758
storePassThrough(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)759 static void QT_FASTCALL storePassThrough(uchar *dest, const uint *src, int index, int count,
760 const QVector<QRgb> *, QDitherInfo *)
761 {
762 uint *d = reinterpret_cast<uint *>(dest) + index;
763 if (d != src)
764 memcpy(d, src, count * sizeof(uint));
765 }
766
convertARGB32ToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)767 static void QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
768 {
769 qt_convertARGB32ToARGB32PM(buffer, buffer, count);
770 }
771
fetchARGB32ToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)772 static const uint *QT_FASTCALL fetchARGB32ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
773 const QVector<QRgb> *, QDitherInfo *)
774 {
775 return qt_convertARGB32ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count);
776 }
777
convertRGBA8888PMToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)778 static void QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
779 {
780 for (int i = 0; i < count; ++i)
781 buffer[i] = RGBA2ARGB(buffer[i]);
782 }
783
fetchRGBA8888PMToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)784 static const uint *QT_FASTCALL fetchRGBA8888PMToARGB32PM(uint *buffer, const uchar *src, int index, int count,
785 const QVector<QRgb> *, QDitherInfo *)
786 {
787 const uint *s = reinterpret_cast<const uint *>(src) + index;
788 UNALIASED_CONVERSION_LOOP(buffer, s, count, RGBA2ARGB);
789 return buffer;
790 }
791
convertRGBA8888ToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)792 static void QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
793 {
794 qt_convertRGBA8888ToARGB32PM(buffer, buffer, count);
795 }
796
fetchRGBA8888ToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)797 static const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
798 const QVector<QRgb> *, QDitherInfo *)
799 {
800 return qt_convertRGBA8888ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count);
801 }
802
convertAlpha8ToRGB32(uint * buffer,int count,const QVector<QRgb> *)803 static void QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
804 {
805 for (int i = 0; i < count; ++i)
806 buffer[i] = qRgba(0, 0, 0, buffer[i]);
807 }
808
fetchAlpha8ToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)809 static const uint *QT_FASTCALL fetchAlpha8ToRGB32(uint *buffer, const uchar *src, int index, int count,
810 const QVector<QRgb> *, QDitherInfo *)
811 {
812 for (int i = 0; i < count; ++i)
813 buffer[i] = qRgba(0, 0, 0, src[index + i]);
814 return buffer;
815 }
816
convertAlpha8ToRGB64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)817 static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const uint *src, int count,
818 const QVector<QRgb> *, QDitherInfo *)
819 {
820 for (int i = 0; i < count; ++i)
821 buffer[i] = QRgba64::fromRgba(0, 0, 0, src[i]);
822 return buffer;
823 }
fetchAlpha8ToRGB64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)824 static const QRgba64 *QT_FASTCALL fetchAlpha8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
825 const QVector<QRgb> *, QDitherInfo *)
826 {
827 for (int i = 0; i < count; ++i)
828 buffer[i] = QRgba64::fromRgba(0, 0, 0, src[index + i]);
829 return buffer;
830 }
831
convertGrayscale8ToRGB32(uint * buffer,int count,const QVector<QRgb> *)832 static void QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
833 {
834 for (int i = 0; i < count; ++i) {
835 const uint s = buffer[i];
836 buffer[i] = qRgb(s, s, s);
837 }
838 }
839
fetchGrayscale8ToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)840 static const uint *QT_FASTCALL fetchGrayscale8ToRGB32(uint *buffer, const uchar *src, int index, int count,
841 const QVector<QRgb> *, QDitherInfo *)
842 {
843 for (int i = 0; i < count; ++i) {
844 const uint s = src[index + i];
845 buffer[i] = qRgb(s, s, s);
846 }
847 return buffer;
848 }
849
convertGrayscale8ToRGB64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)850 static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, const uint *src, int count,
851 const QVector<QRgb> *, QDitherInfo *)
852 {
853 for (int i = 0; i < count; ++i)
854 buffer[i] = QRgba64::fromRgba(src[i], src[i], src[i], 255);
855 return buffer;
856 }
857
fetchGrayscale8ToRGB64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)858 static const QRgba64 *QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
859 const QVector<QRgb> *, QDitherInfo *)
860 {
861 for (int i = 0; i < count; ++i) {
862 const uint s = src[index + i];
863 buffer[i] = QRgba64::fromRgba(s, s, s, 255);
864 }
865 return buffer;
866 }
867
convertGrayscale16ToRGB32(uint * buffer,int count,const QVector<QRgb> *)868 static void QT_FASTCALL convertGrayscale16ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
869 {
870 for (int i = 0; i < count; ++i) {
871 const uint x = qt_div_257(buffer[i]);
872 buffer[i] = qRgb(x, x, x);
873 }
874 }
875
fetchGrayscale16ToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)876 static const uint *QT_FASTCALL fetchGrayscale16ToRGB32(uint *buffer, const uchar *src, int index, int count,
877 const QVector<QRgb> *, QDitherInfo *)
878 {
879 const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index;
880 for (int i = 0; i < count; ++i) {
881 const uint x = qt_div_257(s[i]);
882 buffer[i] = qRgb(x, x, x);
883 }
884 return buffer;
885 }
886
convertGrayscale16ToRGBA64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)887 static const QRgba64 *QT_FASTCALL convertGrayscale16ToRGBA64(QRgba64 *buffer, const uint *src, int count,
888 const QVector<QRgb> *, QDitherInfo *)
889 {
890 const unsigned short *s = reinterpret_cast<const unsigned short *>(src);
891 for (int i = 0; i < count; ++i)
892 buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535);
893 return buffer;
894 }
895
fetchGrayscale16ToRGBA64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)896 static const QRgba64 *QT_FASTCALL fetchGrayscale16ToRGBA64(QRgba64 *buffer, const uchar *src, int index, int count,
897 const QVector<QRgb> *, QDitherInfo *)
898 {
899 const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index;
900 for (int i = 0; i < count; ++i) {
901 buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535);
902 }
903 return buffer;
904 }
905
storeARGB32FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)906 static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count,
907 const QVector<QRgb> *, QDitherInfo *)
908 {
909 uint *d = reinterpret_cast<uint *>(dest) + index;
910 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return qUnpremultiply(c); });
911 }
912
storeRGBA8888PMFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)913 static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
914 const QVector<QRgb> *, QDitherInfo *)
915 {
916 uint *d = reinterpret_cast<uint *>(dest) + index;
917 UNALIASED_CONVERSION_LOOP(d, src, count, ARGB2RGBA);
918 }
919
920 #ifdef __SSE2__
921 template<bool RGBA, bool maskAlpha>
qConvertARGB32PMToRGBA64PM_sse2(QRgba64 * buffer,const uint * src,int count)922 static inline void qConvertARGB32PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
923 {
924 if (count <= 0)
925 return;
926
927 const __m128i amask = _mm_set1_epi32(0xff000000);
928 int i = 0;
929 for (; ((uintptr_t)buffer & 0xf) && i < count; ++i) {
930 uint s = *src++;
931 if (maskAlpha)
932 s = s | 0xff000000;
933 if (RGBA)
934 s = RGBA2ARGB(s);
935 *buffer++ = QRgba64::fromArgb32(s);
936 }
937 for (; i < count-3; i += 4) {
938 __m128i vs = _mm_loadu_si128((const __m128i*)src);
939 if (maskAlpha)
940 vs = _mm_or_si128(vs, amask);
941 src += 4;
942 __m128i v1 = _mm_unpacklo_epi8(vs, vs);
943 __m128i v2 = _mm_unpackhi_epi8(vs, vs);
944 if (!RGBA) {
945 v1 = _mm_shufflelo_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
946 v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
947 v1 = _mm_shufflehi_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
948 v2 = _mm_shufflehi_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
949 }
950 _mm_store_si128((__m128i*)(buffer), v1);
951 buffer += 2;
952 _mm_store_si128((__m128i*)(buffer), v2);
953 buffer += 2;
954 }
955
956 SIMD_EPILOGUE(i, count, 3) {
957 uint s = *src++;
958 if (maskAlpha)
959 s = s | 0xff000000;
960 if (RGBA)
961 s = RGBA2ARGB(s);
962 *buffer++ = QRgba64::fromArgb32(s);
963 }
964 }
965
966 template<QtPixelOrder PixelOrder>
qConvertRGBA64PMToA2RGB30PM_sse2(uint * dest,const QRgba64 * buffer,int count)967 static inline void qConvertRGBA64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count)
968 {
969 const __m128i gmask = _mm_set1_epi32(0x000ffc00);
970 const __m128i cmask = _mm_set1_epi32(0x000003ff);
971 int i = 0;
972 __m128i vr, vg, vb, va;
973 for (; i < count && uintptr_t(buffer) & 0xF; ++i) {
974 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
975 }
976
977 for (; i < count-15; i += 16) {
978 // Repremultiplying is really expensive and hard to do in SIMD without AVX2,
979 // so we try to avoid it by checking if it is needed 16 samples at a time.
980 __m128i vOr = _mm_set1_epi32(0);
981 __m128i vAnd = _mm_set1_epi32(0xffffffff);
982 for (int j = 0; j < 16; j += 2) {
983 __m128i vs = _mm_load_si128((const __m128i*)(buffer + j));
984 vOr = _mm_or_si128(vOr, vs);
985 vAnd = _mm_and_si128(vAnd, vs);
986 }
987 const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7));
988 const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7));
989
990 if (andAlpha == 0xffff) {
991 for (int j = 0; j < 16; j += 2) {
992 __m128i vs = _mm_load_si128((const __m128i*)buffer);
993 buffer += 2;
994 vr = _mm_srli_epi64(vs, 6);
995 vg = _mm_srli_epi64(vs, 16 + 6 - 10);
996 vb = _mm_srli_epi64(vs, 32 + 6);
997 vr = _mm_and_si128(vr, cmask);
998 vg = _mm_and_si128(vg, gmask);
999 vb = _mm_and_si128(vb, cmask);
1000 va = _mm_srli_epi64(vs, 48 + 14);
1001 if (PixelOrder == PixelOrderRGB)
1002 vr = _mm_slli_epi32(vr, 20);
1003 else
1004 vb = _mm_slli_epi32(vb, 20);
1005 va = _mm_slli_epi32(va, 30);
1006 __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va));
1007 vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
1008 _mm_storel_epi64((__m128i*)dest, vd);
1009 dest += 2;
1010 }
1011 } else if (orAlpha == 0) {
1012 for (int j = 0; j < 16; ++j) {
1013 *dest++ = 0;
1014 buffer++;
1015 }
1016 } else {
1017 for (int j = 0; j < 16; ++j)
1018 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
1019 }
1020 }
1021
1022 SIMD_EPILOGUE(i, count, 15)
1023 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
1024 }
1025 #elif defined(__ARM_NEON__)
1026 template<bool RGBA, bool maskAlpha>
qConvertARGB32PMToRGBA64PM_neon(QRgba64 * buffer,const uint * src,int count)1027 static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count)
1028 {
1029 if (count <= 0)
1030 return;
1031
1032 const uint32x4_t amask = vdupq_n_u32(0xff000000);
1033 #if defined(Q_PROCESSOR_ARM_64)
1034 const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
1035 #else
1036 const uint8x8_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7 };
1037 #endif
1038 int i = 0;
1039 for (; i < count-3; i += 4) {
1040 uint32x4_t vs32 = vld1q_u32(src);
1041 src += 4;
1042 if (maskAlpha)
1043 vs32 = vorrq_u32(vs32, amask);
1044 uint8x16_t vs8 = vreinterpretq_u8_u32(vs32);
1045 if (!RGBA) {
1046 #if defined(Q_PROCESSOR_ARM_64)
1047 vs8 = vqtbl1q_u8(vs8, rgbaMask);
1048 #else
1049 // no vqtbl1q_u8
1050 const uint8x8_t vlo = vtbl1_u8(vget_low_u8(vs8), rgbaMask);
1051 const uint8x8_t vhi = vtbl1_u8(vget_high_u8(vs8), rgbaMask);
1052 vs8 = vcombine_u8(vlo, vhi);
1053 #endif
1054 }
1055 uint8x16x2_t v = vzipq_u8(vs8, vs8);
1056
1057 vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[0]));
1058 buffer += 2;
1059 vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[1]));
1060 buffer += 2;
1061 }
1062
1063 SIMD_EPILOGUE(i, count, 3) {
1064 uint s = *src++;
1065 if (maskAlpha)
1066 s = s | 0xff000000;
1067 if (RGBA)
1068 s = RGBA2ARGB(s);
1069 *buffer++ = QRgba64::fromArgb32(s);
1070 }
1071 }
1072 #endif
1073
convertRGB32ToRGB64(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1074 static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uint *src, int count,
1075 const QVector<QRgb> *, QDitherInfo *)
1076 {
1077 #ifdef __SSE2__
1078 qConvertARGB32PMToRGBA64PM_sse2<false, true>(buffer, src, count);
1079 #elif defined(__ARM_NEON__)
1080 qConvertARGB32PMToRGBA64PM_neon<false, true>(buffer, src, count);
1081 #else
1082 for (int i = 0; i < count; ++i)
1083 buffer[i] = QRgba64::fromArgb32(0xff000000 | src[i]);
1084 #endif
1085 return buffer;
1086 }
1087
fetchRGB32ToRGB64(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1088 static const QRgba64 *QT_FASTCALL fetchRGB32ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
1089 const QVector<QRgb> *, QDitherInfo *)
1090 {
1091 return convertRGB32ToRGB64(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1092 }
1093
convertARGB32ToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1094 static const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1095 const QVector<QRgb> *, QDitherInfo *)
1096 {
1097 for (int i = 0; i < count; ++i)
1098 buffer[i] = QRgba64::fromArgb32(src[i]).premultiplied();
1099 return buffer;
1100 }
1101
fetchARGB32ToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1102 static const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1103 const QVector<QRgb> *, QDitherInfo *)
1104 {
1105 return convertARGB32ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1106 }
1107
convertARGB32PMToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1108 static const QRgba64 *QT_FASTCALL convertARGB32PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1109 const QVector<QRgb> *, QDitherInfo *)
1110 {
1111 #ifdef __SSE2__
1112 qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count);
1113 #elif defined(__ARM_NEON__)
1114 qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count);
1115 #else
1116 for (int i = 0; i < count; ++i)
1117 buffer[i] = QRgba64::fromArgb32(src[i]);
1118 #endif
1119 return buffer;
1120 }
1121
fetchARGB32PMToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1122 static const QRgba64 *QT_FASTCALL fetchARGB32PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1123 const QVector<QRgb> *, QDitherInfo *)
1124 {
1125 return convertARGB32PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1126 }
1127
1128 #if QT_CONFIG(raster_64bit)
convertRGBA64ToRGBA64PM(QRgba64 * buffer,int count)1129 static void convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count)
1130 {
1131 for (int i = 0; i < count; ++i)
1132 buffer[i] = buffer[i].premultiplied();
1133 }
1134
convertRGBA64PMToRGBA64PM(QRgba64 *,int)1135 static void convertRGBA64PMToRGBA64PM(QRgba64 *, int)
1136 {
1137 }
1138 #endif
1139
fetchRGBA64ToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1140 static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1141 const QVector<QRgb> *, QDitherInfo *)
1142 {
1143 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1144 for (int i = 0; i < count; ++i)
1145 buffer[i] = QRgba64::fromRgba64(s[i]).premultiplied();
1146 return buffer;
1147 }
1148
convertRGBA8888ToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1149 static const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1150 const QVector<QRgb> *, QDitherInfo *)
1151 {
1152 for (int i = 0; i < count; ++i)
1153 buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i])).premultiplied();
1154 return buffer;
1155 }
1156
fetchRGBA8888ToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1157 static const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1158 const QVector<QRgb> *, QDitherInfo *)
1159 {
1160 return convertRGBA8888ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1161 }
1162
convertRGBA8888PMToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1163 static const QRgba64 *QT_FASTCALL convertRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1164 const QVector<QRgb> *, QDitherInfo *)
1165 {
1166 #ifdef __SSE2__
1167 qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count);
1168 #elif defined(__ARM_NEON__)
1169 qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count);
1170 #else
1171 for (int i = 0; i < count; ++i)
1172 buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i]));
1173 #endif
1174 return buffer;
1175 }
1176
fetchRGBA8888PMToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1177 static const QRgba64 *QT_FASTCALL fetchRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1178 const QVector<QRgb> *, QDitherInfo *)
1179 {
1180 return convertRGBA8888PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1181 }
1182
storeRGBA8888FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1183 static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1184 const QVector<QRgb> *, QDitherInfo *)
1185 {
1186 uint *d = reinterpret_cast<uint *>(dest) + index;
1187 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(qUnpremultiply(c)); });
1188 }
1189
storeRGBXFromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1190 static void QT_FASTCALL storeRGBXFromRGB32(uchar *dest, const uint *src, int index, int count,
1191 const QVector<QRgb> *, QDitherInfo *)
1192 {
1193 uint *d = reinterpret_cast<uint *>(dest) + index;
1194 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); });
1195 }
1196
storeRGBXFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1197 static void QT_FASTCALL storeRGBXFromARGB32PM(uchar *dest, const uint *src, int index, int count,
1198 const QVector<QRgb> *, QDitherInfo *)
1199 {
1200 uint *d = reinterpret_cast<uint *>(dest) + index;
1201 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | qUnpremultiply(c)); });
1202 }
1203
1204 template<QtPixelOrder PixelOrder>
convertA2RGB30PMToARGB32PM(uint * buffer,int count,const QVector<QRgb> *)1205 static void QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
1206 {
1207 for (int i = 0; i < count; ++i)
1208 buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(buffer[i]);
1209 }
1210
1211 template<QtPixelOrder PixelOrder>
fetchA2RGB30PMToARGB32PM(uint * buffer,const uchar * s,int index,int count,const QVector<QRgb> *,QDitherInfo * dither)1212 static const uint *QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint *buffer, const uchar *s, int index, int count,
1213 const QVector<QRgb> *, QDitherInfo *dither)
1214 {
1215 const uint *src = reinterpret_cast<const uint *>(s) + index;
1216 if (!dither) {
1217 UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>);
1218 } else {
1219 for (int i = 0; i < count; ++i) {
1220 const uint c = src[i];
1221 short d10 = (qt_bayer_matrix[dither->y & 15][(dither->x + i) & 15] << 2);
1222 short a10 = (c >> 30) * 0x155;
1223 short r10 = ((c >> 20) & 0x3ff);
1224 short g10 = ((c >> 10) & 0x3ff);
1225 short b10 = (c & 0x3ff);
1226 if (PixelOrder == PixelOrderBGR)
1227 std::swap(r10, b10);
1228 short a8 = (a10 + ((d10 - a10) >> 8)) >> 2;
1229 short r8 = (r10 + ((d10 - r10) >> 8)) >> 2;
1230 short g8 = (g10 + ((d10 - g10) >> 8)) >> 2;
1231 short b8 = (b10 + ((d10 - b10) >> 8)) >> 2;
1232 buffer[i] = qRgba(r8, g8, b8, a8);
1233 }
1234 }
1235 return buffer;
1236 }
1237
1238 #ifdef __SSE2__
1239 template<QtPixelOrder PixelOrder>
qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 * buffer,const uint * src,int count)1240 static inline void qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
1241 {
1242 if (count <= 0)
1243 return;
1244
1245 const __m128i rmask = _mm_set1_epi32(0x3ff00000);
1246 const __m128i gmask = _mm_set1_epi32(0x000ffc00);
1247 const __m128i bmask = _mm_set1_epi32(0x000003ff);
1248 const __m128i afactor = _mm_set1_epi16(0x5555);
1249 int i = 0;
1250
1251 for (; ((uintptr_t)buffer & 0xf) && i < count; ++i)
1252 *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
1253
1254 for (; i < count-3; i += 4) {
1255 __m128i vs = _mm_loadu_si128((const __m128i*)src);
1256 src += 4;
1257 __m128i va = _mm_srli_epi32(vs, 30);
1258 __m128i vr = _mm_and_si128(vs, rmask);
1259 __m128i vb = _mm_and_si128(vs, bmask);
1260 __m128i vg = _mm_and_si128(vs, gmask);
1261 va = _mm_mullo_epi16(va, afactor);
1262 vr = _mm_or_si128(_mm_srli_epi32(vr, 14), _mm_srli_epi32(vr, 24));
1263 vg = _mm_or_si128(_mm_srli_epi32(vg, 4), _mm_srli_epi32(vg, 14));
1264 vb = _mm_or_si128(_mm_slli_epi32(vb, 6), _mm_srli_epi32(vb, 4));
1265 __m128i vrb;
1266 if (PixelOrder == PixelOrderRGB)
1267 vrb = _mm_or_si128(vr, _mm_slli_si128(vb, 2));
1268 else
1269 vrb = _mm_or_si128(vb, _mm_slli_si128(vr, 2));
1270 __m128i vga = _mm_or_si128(vg, _mm_slli_si128(va, 2));
1271 _mm_store_si128((__m128i*)(buffer), _mm_unpacklo_epi16(vrb, vga));
1272 buffer += 2;
1273 _mm_store_si128((__m128i*)(buffer), _mm_unpackhi_epi16(vrb, vga));
1274 buffer += 2;
1275 }
1276
1277 SIMD_EPILOGUE(i, count, 3)
1278 *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
1279 }
1280 #endif
1281
1282 template<QtPixelOrder PixelOrder>
convertA2RGB30PMToRGBA64PM(QRgba64 * buffer,const uint * src,int count,const QVector<QRgb> *,QDitherInfo *)1283 static const QRgba64 *QT_FASTCALL convertA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1284 const QVector<QRgb> *, QDitherInfo *)
1285 {
1286 #ifdef __SSE2__
1287 qConvertA2RGB30PMToRGBA64PM_sse2<PixelOrder>(buffer, src, count);
1288 #else
1289 for (int i = 0; i < count; ++i)
1290 buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]);
1291 #endif
1292 return buffer;
1293 }
1294
1295 template<QtPixelOrder PixelOrder>
fetchA2RGB30PMToRGBA64PM(QRgba64 * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1296 static const QRgba64 *QT_FASTCALL fetchA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1297 const QVector<QRgb> *, QDitherInfo *)
1298 {
1299 return convertA2RGB30PMToRGBA64PM<PixelOrder>(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1300 }
1301
1302 template<QtPixelOrder PixelOrder>
storeA2RGB30PMFromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1303 static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
1304 const QVector<QRgb> *, QDitherInfo *)
1305 {
1306 uint *d = reinterpret_cast<uint *>(dest) + index;
1307 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertArgb32ToA2rgb30<PixelOrder>);
1308 }
1309
1310 template<QtPixelOrder PixelOrder>
storeRGB30FromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1311 static void QT_FASTCALL storeRGB30FromRGB32(uchar *dest, const uint *src, int index, int count,
1312 const QVector<QRgb> *, QDitherInfo *)
1313 {
1314 uint *d = reinterpret_cast<uint *>(dest) + index;
1315 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1316 }
1317
1318 template<QtPixelOrder PixelOrder>
storeRGB30FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1319 static void QT_FASTCALL storeRGB30FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1320 const QVector<QRgb> *, QDitherInfo *)
1321 {
1322 uint *d = reinterpret_cast<uint *>(dest) + index;
1323 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1324 }
1325
1326 template<bool RGBA>
qt_convertRGBA64ToARGB32(uint * dst,const QRgba64 * src,int count)1327 void qt_convertRGBA64ToARGB32(uint *dst, const QRgba64 *src, int count)
1328 {
1329 int i = 0;
1330 #ifdef __SSE2__
1331 if (((uintptr_t)dst & 0x7) && count > 0) {
1332 uint s = (*src++).toArgb32();
1333 if (RGBA)
1334 s = ARGB2RGBA(s);
1335 *dst++ = s;
1336 i++;
1337 }
1338 const __m128i vhalf = _mm_set1_epi32(0x80);
1339 const __m128i vzero = _mm_setzero_si128();
1340 for (; i < count-1; i += 2) {
1341 __m128i vs = _mm_loadu_si128((const __m128i*)src);
1342 src += 2;
1343 if (!RGBA) {
1344 vs = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
1345 vs = _mm_shufflehi_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
1346 }
1347 __m128i v1 = _mm_unpacklo_epi16(vs, vzero);
1348 __m128i v2 = _mm_unpackhi_epi16(vs, vzero);
1349 v1 = _mm_add_epi32(v1, vhalf);
1350 v2 = _mm_add_epi32(v2, vhalf);
1351 v1 = _mm_sub_epi32(v1, _mm_srli_epi32(v1, 8));
1352 v2 = _mm_sub_epi32(v2, _mm_srli_epi32(v2, 8));
1353 v1 = _mm_srli_epi32(v1, 8);
1354 v2 = _mm_srli_epi32(v2, 8);
1355 v1 = _mm_packs_epi32(v1, v2);
1356 v1 = _mm_packus_epi16(v1, vzero);
1357 _mm_storel_epi64((__m128i*)(dst), v1);
1358 dst += 2;
1359 }
1360 #endif
1361 for (; i < count; i++) {
1362 uint s = (*src++).toArgb32();
1363 if (RGBA)
1364 s = ARGB2RGBA(s);
1365 *dst++ = s;
1366 }
1367 }
1368 template void qt_convertRGBA64ToARGB32<false>(uint *dst, const QRgba64 *src, int count);
1369 template void qt_convertRGBA64ToARGB32<true>(uint *dst, const QRgba64 *src, int count);
1370
1371
storeAlpha8FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1372 static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1373 const QVector<QRgb> *, QDitherInfo *)
1374 {
1375 for (int i = 0; i < count; ++i)
1376 dest[index + i] = qAlpha(src[i]);
1377 }
1378
storeGrayscale8FromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1379 static void QT_FASTCALL storeGrayscale8FromRGB32(uchar *dest, const uint *src, int index, int count,
1380 const QVector<QRgb> *, QDitherInfo *)
1381 {
1382 for (int i = 0; i < count; ++i)
1383 dest[index + i] = qGray(src[i]);
1384 }
1385
storeGrayscale8FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1386 static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1387 const QVector<QRgb> *, QDitherInfo *)
1388 {
1389 for (int i = 0; i < count; ++i)
1390 dest[index + i] = qGray(qUnpremultiply(src[i]));
1391 }
1392
storeGrayscale16FromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1393 static void QT_FASTCALL storeGrayscale16FromRGB32(uchar *dest, const uint *src, int index, int count,
1394 const QVector<QRgb> *, QDitherInfo *)
1395 {
1396 unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index;
1397 for (int i = 0; i < count; ++i)
1398 d[i] = qGray(src[i]) * 257;
1399 }
1400
storeGrayscale16FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1401 static void QT_FASTCALL storeGrayscale16FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1402 const QVector<QRgb> *, QDitherInfo *)
1403 {
1404 unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index;
1405 for (int i = 0; i < count; ++i)
1406 d[i] = qGray(qUnpremultiply(src[i])) * 257;
1407 }
1408
fetchRGB64ToRGB32(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1409 static const uint *QT_FASTCALL fetchRGB64ToRGB32(uint *buffer, const uchar *src, int index, int count,
1410 const QVector<QRgb> *, QDitherInfo *)
1411 {
1412 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1413 for (int i = 0; i < count; ++i)
1414 buffer[i] = toArgb32(s[i]);
1415 return buffer;
1416 }
1417
storeRGB64FromRGB32(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1418 static void QT_FASTCALL storeRGB64FromRGB32(uchar *dest, const uint *src, int index, int count,
1419 const QVector<QRgb> *, QDitherInfo *)
1420 {
1421 QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1422 for (int i = 0; i < count; ++i)
1423 d[i] = QRgba64::fromArgb32(src[i]);
1424 }
1425
fetchRGBA64ToARGB32PM(uint * buffer,const uchar * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1426 static const uint *QT_FASTCALL fetchRGBA64ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
1427 const QVector<QRgb> *, QDitherInfo *)
1428 {
1429 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1430 for (int i = 0; i < count; ++i)
1431 buffer[i] = toArgb32(s[i].premultiplied());
1432 return buffer;
1433 }
1434
storeRGBA64FromARGB32PM(uchar * dest,const uint * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1435 static void QT_FASTCALL storeRGBA64FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1436 const QVector<QRgb> *, QDitherInfo *)
1437 {
1438 QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1439 for (int i = 0; i < count; ++i)
1440 d[i] = QRgba64::fromArgb32(src[i]).unpremultiplied();
1441 }
1442
1443 // Note:
1444 // convertToArgb32() assumes that no color channel is less than 4 bits.
1445 // storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits.
1446 // QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits.
1447 QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
1448 { false, false, QPixelLayout::BPPNone, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, // Format_Invalid
1449 { false, false, QPixelLayout::BPP1MSB, nullptr,
1450 convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1451 fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1MSB>,
1452 nullptr, nullptr }, // Format_Mono
1453 { false, false, QPixelLayout::BPP1LSB, nullptr,
1454 convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1455 fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1LSB>,
1456 nullptr, nullptr }, // Format_MonoLSB
1457 { false, false, QPixelLayout::BPP8, nullptr,
1458 convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1459 fetchIndexedToARGB32PM<QPixelLayout::BPP8>, fetchIndexedToRGBA64PM<QPixelLayout::BPP8>,
1460 nullptr, nullptr }, // Format_Indexed8
1461 // Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong,
1462 // but everywhere this generic conversion would be wrong is currently overloaded.
1463 { false, false, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough,
1464 convertRGB32ToRGB64, fetchPassThrough, fetchRGB32ToRGB64, storePassThrough, storePassThrough }, // Format_RGB32
1465 { true, false, QPixelLayout::BPP32, rbSwap_rgb32, convertARGB32ToARGB32PM,
1466 convertARGB32ToRGBA64PM, fetchARGB32ToARGB32PM, fetchARGB32ToRGBA64PM, storeARGB32FromARGB32PM, storePassThrough }, // Format_ARGB32
1467 { true, true, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough,
1468 convertARGB32PMToRGBA64PM, fetchPassThrough, fetchARGB32PMToRGBA64PM, storePassThrough, storePassThrough }, // Format_ARGB32_Premultiplied
1469 pixelLayoutRGB<QImage::Format_RGB16>(),
1470 pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(),
1471 pixelLayoutRGB<QImage::Format_RGB666>(),
1472 pixelLayoutARGBPM<QImage::Format_ARGB6666_Premultiplied>(),
1473 pixelLayoutRGB<QImage::Format_RGB555>(),
1474 pixelLayoutARGBPM<QImage::Format_ARGB8555_Premultiplied>(),
1475 pixelLayoutRGB<QImage::Format_RGB888>(),
1476 pixelLayoutRGB<QImage::Format_RGB444>(),
1477 pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(),
1478 { false, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM,
1479 convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBXFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBX8888
1480 { true, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888ToARGB32PM,
1481 convertRGBA8888ToRGBA64PM, fetchRGBA8888ToARGB32PM, fetchRGBA8888ToRGBA64PM, storeRGBA8888FromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888
1482 { true, true, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM,
1483 convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBA8888PMFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied
1484 { false, false, QPixelLayout::BPP32, rbSwap_rgb30,
1485 convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1486 convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1487 fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1488 fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1489 storeRGB30FromARGB32PM<PixelOrderBGR>,
1490 storeRGB30FromRGB32<PixelOrderBGR>
1491 }, // Format_BGR30
1492 { true, true, QPixelLayout::BPP32, rbSwap_rgb30,
1493 convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1494 convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1495 fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1496 fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1497 storeA2RGB30PMFromARGB32PM<PixelOrderBGR>,
1498 storeRGB30FromRGB32<PixelOrderBGR>
1499 }, // Format_A2BGR30_Premultiplied
1500 { false, false, QPixelLayout::BPP32, rbSwap_rgb30,
1501 convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1502 convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1503 fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1504 fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1505 storeRGB30FromARGB32PM<PixelOrderRGB>,
1506 storeRGB30FromRGB32<PixelOrderRGB>
1507 }, // Format_RGB30
1508 { true, true, QPixelLayout::BPP32, rbSwap_rgb30,
1509 convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1510 convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1511 fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1512 fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1513 storeA2RGB30PMFromARGB32PM<PixelOrderRGB>,
1514 storeRGB30FromRGB32<PixelOrderRGB>
1515 }, // Format_A2RGB30_Premultiplied
1516 { true, true, QPixelLayout::BPP8, nullptr,
1517 convertAlpha8ToRGB32, convertAlpha8ToRGB64,
1518 fetchAlpha8ToRGB32, fetchAlpha8ToRGB64,
1519 storeAlpha8FromARGB32PM, nullptr }, // Format_Alpha8
1520 { false, false, QPixelLayout::BPP8, nullptr,
1521 convertGrayscale8ToRGB32, convertGrayscale8ToRGB64,
1522 fetchGrayscale8ToRGB32, fetchGrayscale8ToRGB64,
1523 storeGrayscale8FromARGB32PM, storeGrayscale8FromRGB32 }, // Format_Grayscale8
1524 { false, false, QPixelLayout::BPP64, nullptr,
1525 convertPassThrough, nullptr,
1526 fetchRGB64ToRGB32, fetchPassThrough64,
1527 storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBX64
1528 { true, false, QPixelLayout::BPP64, nullptr,
1529 convertARGB32ToARGB32PM, nullptr,
1530 fetchRGBA64ToARGB32PM, fetchRGBA64ToRGBA64PM,
1531 storeRGBA64FromARGB32PM, storeRGB64FromRGB32 }, // Format_RGBA64
1532 { true, true, QPixelLayout::BPP64, nullptr,
1533 convertPassThrough, nullptr,
1534 fetchRGB64ToRGB32, fetchPassThrough64,
1535 storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBA64_Premultiplied
1536 { false, false, QPixelLayout::BPP16, nullptr,
1537 convertGrayscale16ToRGB32, convertGrayscale16ToRGBA64,
1538 fetchGrayscale16ToRGB32, fetchGrayscale16ToRGBA64,
1539 storeGrayscale16FromARGB32PM, storeGrayscale16FromRGB32 }, // Format_Grayscale16
1540 pixelLayoutRGB<QImage::Format_BGR888>(),
1541 };
1542
1543 Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats);
1544
convertFromRgb64(uint * dest,const QRgba64 * src,int length)1545 static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length)
1546 {
1547 for (int i = 0; i < length; ++i) {
1548 dest[i] = toArgb32(src[i]);
1549 }
1550 }
1551
1552 template<QImage::Format format>
storeGenericFromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> * clut,QDitherInfo * dither)1553 static void QT_FASTCALL storeGenericFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1554 const QVector<QRgb> *clut, QDitherInfo *dither)
1555 {
1556 uint buffer[BufferSize];
1557 convertFromRgb64(buffer, src, count);
1558 qPixelLayouts[format].storeFromARGB32PM(dest, buffer, index, count, clut, dither);
1559 }
1560
storeARGB32FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1561 static void QT_FASTCALL storeARGB32FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1562 const QVector<QRgb> *, QDitherInfo *)
1563 {
1564 uint *d = (uint*)dest + index;
1565 for (int i = 0; i < count; ++i)
1566 d[i] = toArgb32(src[i].unpremultiplied());
1567 }
1568
storeRGBA8888FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1569 static void QT_FASTCALL storeRGBA8888FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1570 const QVector<QRgb> *, QDitherInfo *)
1571 {
1572 uint *d = (uint*)dest + index;
1573 for (int i = 0; i < count; ++i)
1574 d[i] = toRgba8888(src[i].unpremultiplied());
1575 }
1576
1577 template<QtPixelOrder PixelOrder>
storeRGB30FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1578 static void QT_FASTCALL storeRGB30FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1579 const QVector<QRgb> *, QDitherInfo *)
1580 {
1581 uint *d = (uint*)dest + index;
1582 #ifdef __SSE2__
1583 qConvertRGBA64PMToA2RGB30PM_sse2<PixelOrder>(d, src, count);
1584 #else
1585 for (int i = 0; i < count; ++i)
1586 d[i] = qConvertRgb64ToRgb30<PixelOrder>(src[i]);
1587 #endif
1588 }
1589
storeRGBX64FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1590 static void QT_FASTCALL storeRGBX64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1591 const QVector<QRgb> *, QDitherInfo *)
1592 {
1593 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1594 for (int i = 0; i < count; ++i) {
1595 d[i] = src[i].unpremultiplied();
1596 d[i].setAlpha(65535);
1597 }
1598 }
1599
storeRGBA64FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1600 static void QT_FASTCALL storeRGBA64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1601 const QVector<QRgb> *, QDitherInfo *)
1602 {
1603 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1604 for (int i = 0; i < count; ++i)
1605 d[i] = src[i].unpremultiplied();
1606 }
1607
storeRGBA64PMFromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1608 static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1609 const QVector<QRgb> *, QDitherInfo *)
1610 {
1611 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1612 if (d != src)
1613 memcpy(d, src, count * sizeof(QRgba64));
1614 }
1615
storeGray16FromRGBA64PM(uchar * dest,const QRgba64 * src,int index,int count,const QVector<QRgb> *,QDitherInfo *)1616 static void QT_FASTCALL storeGray16FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1617 const QVector<QRgb> *, QDitherInfo *)
1618 {
1619 quint16 *d = reinterpret_cast<quint16*>(dest) + index;
1620 for (int i = 0; i < count; ++i) {
1621 QRgba64 s = src[i].unpremultiplied();
1622 d[i] = qGray(s.red(), s.green(), s.blue());
1623 }
1624 }
1625
1626 ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = {
1627 nullptr,
1628 nullptr,
1629 nullptr,
1630 nullptr,
1631 storeGenericFromRGBA64PM<QImage::Format_RGB32>,
1632 storeARGB32FromRGBA64PM,
1633 storeGenericFromRGBA64PM<QImage::Format_ARGB32_Premultiplied>,
1634 storeGenericFromRGBA64PM<QImage::Format_RGB16>,
1635 storeGenericFromRGBA64PM<QImage::Format_ARGB8565_Premultiplied>,
1636 storeGenericFromRGBA64PM<QImage::Format_RGB666>,
1637 storeGenericFromRGBA64PM<QImage::Format_ARGB6666_Premultiplied>,
1638 storeGenericFromRGBA64PM<QImage::Format_RGB555>,
1639 storeGenericFromRGBA64PM<QImage::Format_ARGB8555_Premultiplied>,
1640 storeGenericFromRGBA64PM<QImage::Format_RGB888>,
1641 storeGenericFromRGBA64PM<QImage::Format_RGB444>,
1642 storeGenericFromRGBA64PM<QImage::Format_ARGB4444_Premultiplied>,
1643 storeGenericFromRGBA64PM<QImage::Format_RGBX8888>,
1644 storeRGBA8888FromRGBA64PM,
1645 storeGenericFromRGBA64PM<QImage::Format_RGBA8888_Premultiplied>,
1646 storeRGB30FromRGBA64PM<PixelOrderBGR>,
1647 storeRGB30FromRGBA64PM<PixelOrderBGR>,
1648 storeRGB30FromRGBA64PM<PixelOrderRGB>,
1649 storeRGB30FromRGBA64PM<PixelOrderRGB>,
1650 storeGenericFromRGBA64PM<QImage::Format_Alpha8>,
1651 storeGenericFromRGBA64PM<QImage::Format_Grayscale8>,
1652 storeRGBX64FromRGBA64PM,
1653 storeRGBA64FromRGBA64PM,
1654 storeRGBA64PMFromRGBA64PM,
1655 storeGray16FromRGBA64PM,
1656 storeGenericFromRGBA64PM<QImage::Format_BGR888>,
1657 };
1658
1659 /*
1660 Destination fetch. This is simple as we don't have to do bounds checks or
1661 transformations
1662 */
1663
destFetchMono(uint * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1664 static uint * QT_FASTCALL destFetchMono(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1665 {
1666 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1667 uint *start = buffer;
1668 const uint *end = buffer + length;
1669 while (buffer < end) {
1670 *buffer = data[x>>3] & (0x80 >> (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1671 ++buffer;
1672 ++x;
1673 }
1674 return start;
1675 }
1676
destFetchMonoLsb(uint * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1677 static uint * QT_FASTCALL destFetchMonoLsb(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1678 {
1679 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1680 uint *start = buffer;
1681 const uint *end = buffer + length;
1682 while (buffer < end) {
1683 *buffer = data[x>>3] & (0x1 << (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1684 ++buffer;
1685 ++x;
1686 }
1687 return start;
1688 }
1689
destFetchARGB32P(uint *,QRasterBuffer * rasterBuffer,int x,int y,int)1690 static uint * QT_FASTCALL destFetchARGB32P(uint *, QRasterBuffer *rasterBuffer, int x, int y, int)
1691 {
1692 return (uint *)rasterBuffer->scanLine(y) + x;
1693 }
1694
destFetchRGB16(uint * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1695 static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1696 {
1697 const ushort *Q_DECL_RESTRICT data = (const ushort *)rasterBuffer->scanLine(y) + x;
1698 for (int i = 0; i < length; ++i)
1699 buffer[i] = qConvertRgb16To32(data[i]);
1700 return buffer;
1701 }
1702
destFetch(uint * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1703 static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1704 {
1705 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1706 return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
1707 }
1708
destFetchUndefined(uint * buffer,QRasterBuffer *,int,int,int)1709 static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int)
1710 {
1711 return buffer;
1712 }
1713
1714 static DestFetchProc destFetchProc[QImage::NImageFormats] =
1715 {
1716 nullptr, // Format_Invalid
1717 destFetchMono, // Format_Mono,
1718 destFetchMonoLsb, // Format_MonoLSB
1719 nullptr, // Format_Indexed8
1720 destFetchARGB32P, // Format_RGB32
1721 destFetch, // Format_ARGB32,
1722 destFetchARGB32P, // Format_ARGB32_Premultiplied
1723 destFetchRGB16, // Format_RGB16
1724 destFetch, // Format_ARGB8565_Premultiplied
1725 destFetch, // Format_RGB666
1726 destFetch, // Format_ARGB6666_Premultiplied
1727 destFetch, // Format_RGB555
1728 destFetch, // Format_ARGB8555_Premultiplied
1729 destFetch, // Format_RGB888
1730 destFetch, // Format_RGB444
1731 destFetch, // Format_ARGB4444_Premultiplied
1732 destFetch, // Format_RGBX8888
1733 destFetch, // Format_RGBA8888
1734 destFetch, // Format_RGBA8888_Premultiplied
1735 destFetch, // Format_BGR30
1736 destFetch, // Format_A2BGR30_Premultiplied
1737 destFetch, // Format_RGB30
1738 destFetch, // Format_A2RGB30_Premultiplied
1739 destFetch, // Format_Alpha8
1740 destFetch, // Format_Grayscale8
1741 destFetch, // Format_RGBX64
1742 destFetch, // Format_RGBA64
1743 destFetch, // Format_RGBA64_Premultiplied
1744 destFetch, // Format_Grayscale16
1745 destFetch, // Format_BGR888
1746 };
1747
1748 #if QT_CONFIG(raster_64bit)
destFetch64(QRgba64 * buffer,QRasterBuffer * rasterBuffer,int x,int y,int length)1749 static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1750 {
1751 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1752 return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
1753 }
1754
destFetchRGB64(QRgba64 *,QRasterBuffer * rasterBuffer,int x,int y,int)1755 static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int)
1756 {
1757 return (QRgba64 *)rasterBuffer->scanLine(y) + x;
1758 }
1759
destFetch64Undefined(QRgba64 * buffer,QRasterBuffer *,int,int,int)1760 static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int)
1761 {
1762 return buffer;
1763 }
1764
1765 static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
1766 {
1767 nullptr, // Format_Invalid
1768 nullptr, // Format_Mono,
1769 nullptr, // Format_MonoLSB
1770 nullptr, // Format_Indexed8
1771 destFetch64, // Format_RGB32
1772 destFetch64, // Format_ARGB32,
1773 destFetch64, // Format_ARGB32_Premultiplied
1774 destFetch64, // Format_RGB16
1775 destFetch64, // Format_ARGB8565_Premultiplied
1776 destFetch64, // Format_RGB666
1777 destFetch64, // Format_ARGB6666_Premultiplied
1778 destFetch64, // Format_RGB555
1779 destFetch64, // Format_ARGB8555_Premultiplied
1780 destFetch64, // Format_RGB888
1781 destFetch64, // Format_RGB444
1782 destFetch64, // Format_ARGB4444_Premultiplied
1783 destFetch64, // Format_RGBX8888
1784 destFetch64, // Format_RGBA8888
1785 destFetch64, // Format_RGBA8888_Premultiplied
1786 destFetch64, // Format_BGR30
1787 destFetch64, // Format_A2BGR30_Premultiplied
1788 destFetch64, // Format_RGB30
1789 destFetch64, // Format_A2RGB30_Premultiplied
1790 destFetch64, // Format_Alpha8
1791 destFetch64, // Format_Grayscale8
1792 destFetchRGB64, // Format_RGBX64
1793 destFetch64, // Format_RGBA64
1794 destFetchRGB64, // Format_RGBA64_Premultiplied
1795 destFetch64, // Format_Grayscale16
1796 destFetch64, // Format_BGR888
1797 };
1798 #endif
1799
1800 /*
1801 Returns the color in the mono destination color table
1802 that is the "nearest" to /color/.
1803 */
findNearestColor(QRgb color,QRasterBuffer * rbuf)1804 static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf)
1805 {
1806 QRgb color_0 = qPremultiply(rbuf->destColor0);
1807 QRgb color_1 = qPremultiply(rbuf->destColor1);
1808 color = qPremultiply(color);
1809
1810 int r = qRed(color);
1811 int g = qGreen(color);
1812 int b = qBlue(color);
1813 int rx, gx, bx;
1814 int dist_0, dist_1;
1815
1816 rx = r - qRed(color_0);
1817 gx = g - qGreen(color_0);
1818 bx = b - qBlue(color_0);
1819 dist_0 = rx*rx + gx*gx + bx*bx;
1820
1821 rx = r - qRed(color_1);
1822 gx = g - qGreen(color_1);
1823 bx = b - qBlue(color_1);
1824 dist_1 = rx*rx + gx*gx + bx*bx;
1825
1826 if (dist_0 < dist_1)
1827 return color_0;
1828 return color_1;
1829 }
1830
1831 /*
1832 Destination store.
1833 */
1834
destStoreMono(QRasterBuffer * rasterBuffer,int x,int y,const uint * buffer,int length)1835 static void QT_FASTCALL destStoreMono(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1836 {
1837 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1838 if (rasterBuffer->monoDestinationWithClut) {
1839 for (int i = 0; i < length; ++i) {
1840 if (buffer[i] == rasterBuffer->destColor0) {
1841 data[x >> 3] &= ~(0x80 >> (x & 7));
1842 } else if (buffer[i] == rasterBuffer->destColor1) {
1843 data[x >> 3] |= 0x80 >> (x & 7);
1844 } else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) {
1845 data[x >> 3] &= ~(0x80 >> (x & 7));
1846 } else {
1847 data[x >> 3] |= 0x80 >> (x & 7);
1848 }
1849 ++x;
1850 }
1851 } else {
1852 for (int i = 0; i < length; ++i) {
1853 if (qGray(buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
1854 data[x >> 3] |= 0x80 >> (x & 7);
1855 else
1856 data[x >> 3] &= ~(0x80 >> (x & 7));
1857 ++x;
1858 }
1859 }
1860 }
1861
destStoreMonoLsb(QRasterBuffer * rasterBuffer,int x,int y,const uint * buffer,int length)1862 static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1863 {
1864 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1865 if (rasterBuffer->monoDestinationWithClut) {
1866 for (int i = 0; i < length; ++i) {
1867 if (buffer[i] == rasterBuffer->destColor0) {
1868 data[x >> 3] &= ~(1 << (x & 7));
1869 } else if (buffer[i] == rasterBuffer->destColor1) {
1870 data[x >> 3] |= 1 << (x & 7);
1871 } else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) {
1872 data[x >> 3] &= ~(1 << (x & 7));
1873 } else {
1874 data[x >> 3] |= 1 << (x & 7);
1875 }
1876 ++x;
1877 }
1878 } else {
1879 for (int i = 0; i < length; ++i) {
1880 if (qGray(buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
1881 data[x >> 3] |= 1 << (x & 7);
1882 else
1883 data[x >> 3] &= ~(1 << (x & 7));
1884 ++x;
1885 }
1886 }
1887 }
1888
destStoreRGB16(QRasterBuffer * rasterBuffer,int x,int y,const uint * buffer,int length)1889 static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1890 {
1891 quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x;
1892 for (int i = 0; i < length; ++i)
1893 data[i] = qConvertRgb32To16(buffer[i]);
1894 }
1895
destStore(QRasterBuffer * rasterBuffer,int x,int y,const uint * buffer,int length)1896 static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1897 {
1898 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1899 ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
1900 if (!layout->premultiplied && !layout->hasAlphaChannel)
1901 store = layout->storeFromRGB32;
1902 uchar *dest = rasterBuffer->scanLine(y);
1903 store(dest, buffer, x, length, nullptr, nullptr);
1904 }
1905
1906 static DestStoreProc destStoreProc[QImage::NImageFormats] =
1907 {
1908 nullptr, // Format_Invalid
1909 destStoreMono, // Format_Mono,
1910 destStoreMonoLsb, // Format_MonoLSB
1911 nullptr, // Format_Indexed8
1912 nullptr, // Format_RGB32
1913 destStore, // Format_ARGB32,
1914 nullptr, // Format_ARGB32_Premultiplied
1915 destStoreRGB16, // Format_RGB16
1916 destStore, // Format_ARGB8565_Premultiplied
1917 destStore, // Format_RGB666
1918 destStore, // Format_ARGB6666_Premultiplied
1919 destStore, // Format_RGB555
1920 destStore, // Format_ARGB8555_Premultiplied
1921 destStore, // Format_RGB888
1922 destStore, // Format_RGB444
1923 destStore, // Format_ARGB4444_Premultiplied
1924 destStore, // Format_RGBX8888
1925 destStore, // Format_RGBA8888
1926 destStore, // Format_RGBA8888_Premultiplied
1927 destStore, // Format_BGR30
1928 destStore, // Format_A2BGR30_Premultiplied
1929 destStore, // Format_RGB30
1930 destStore, // Format_A2RGB30_Premultiplied
1931 destStore, // Format_Alpha8
1932 destStore, // Format_Grayscale8
1933 destStore, // Format_RGBX64
1934 destStore, // Format_RGBA64
1935 destStore, // Format_RGBA64_Premultiplied
1936 destStore, // Format_Grayscale16
1937 destStore, // Format_BGR888
1938 };
1939
1940 #if QT_CONFIG(raster_64bit)
destStore64(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 * buffer,int length)1941 static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
1942 {
1943 auto store = qStoreFromRGBA64PM[rasterBuffer->format];
1944 uchar *dest = rasterBuffer->scanLine(y);
1945 store(dest, buffer, x, length, nullptr, nullptr);
1946 }
1947
destStore64RGBA64(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 * buffer,int length)1948 static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
1949 {
1950 QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x;
1951 for (int i = 0; i < length; ++i) {
1952 dest[i] = buffer[i].unpremultiplied();
1953 }
1954 }
1955
1956 static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
1957 {
1958 nullptr, // Format_Invalid
1959 nullptr, // Format_Mono,
1960 nullptr, // Format_MonoLSB
1961 nullptr, // Format_Indexed8
1962 destStore64, // Format_RGB32
1963 destStore64, // Format_ARGB32,
1964 destStore64, // Format_ARGB32_Premultiplied
1965 destStore64, // Format_RGB16
1966 destStore64, // Format_ARGB8565_Premultiplied
1967 destStore64, // Format_RGB666
1968 destStore64, // Format_ARGB6666_Premultiplied
1969 destStore64, // Format_RGB555
1970 destStore64, // Format_ARGB8555_Premultiplied
1971 destStore64, // Format_RGB888
1972 destStore64, // Format_RGB444
1973 destStore64, // Format_ARGB4444_Premultiplied
1974 destStore64, // Format_RGBX8888
1975 destStore64, // Format_RGBA8888
1976 destStore64, // Format_RGBA8888_Premultiplied
1977 destStore64, // Format_BGR30
1978 destStore64, // Format_A2BGR30_Premultiplied
1979 destStore64, // Format_RGB30
1980 destStore64, // Format_A2RGB30_Premultiplied
1981 destStore64, // Format_Alpha8
1982 destStore64, // Format_Grayscale8
1983 nullptr, // Format_RGBX64
1984 destStore64RGBA64, // Format_RGBA64
1985 nullptr, // Format_RGBA64_Premultiplied
1986 destStore64, // Format_Grayscale16
1987 destStore64, // Format_BGR888
1988 };
1989 #endif
1990
1991 /*
1992 Source fetches
1993
1994 This is a bit more complicated, as we need several fetch routines for every surface type
1995
1996 We need 5 fetch methods per surface type:
1997 untransformed
1998 transformed (tiled and not tiled)
1999 transformed bilinear (tiled and not tiled)
2000
2001 We don't need bounds checks for untransformed, but we need them for the other ones.
2002
2003 The generic implementation does pixel by pixel fetches
2004 */
2005
2006 enum TextureBlendType {
2007 BlendUntransformed,
2008 BlendTiled,
2009 BlendTransformed,
2010 BlendTransformedTiled,
2011 BlendTransformedBilinear,
2012 BlendTransformedBilinearTiled,
2013 NBlendTypes
2014 };
2015
fetchUntransformed(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2016 static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator *,
2017 const QSpanData *data, int y, int x, int length)
2018 {
2019 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2020 return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2021 }
2022
fetchUntransformedARGB32PM(uint *,const Operator *,const QSpanData * data,int y,int x,int)2023 static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *,
2024 const QSpanData *data, int y, int x, int)
2025 {
2026 const uchar *scanLine = data->texture.scanLine(y);
2027 return reinterpret_cast<const uint *>(scanLine) + x;
2028 }
2029
fetchUntransformedRGB16(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2030 static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *,
2031 const QSpanData *data, int y, int x,
2032 int length)
2033 {
2034 const quint16 *scanLine = (const quint16 *)data->texture.scanLine(y) + x;
2035 for (int i = 0; i < length; ++i)
2036 buffer[i] = qConvertRgb16To32(scanLine[i]);
2037 return buffer;
2038 }
2039
2040 #if QT_CONFIG(raster_64bit)
fetchUntransformed64(QRgba64 * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2041 static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Operator *,
2042 const QSpanData *data, int y, int x, int length)
2043 {
2044 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2045 return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2046 }
2047
fetchUntransformedRGBA64PM(QRgba64 *,const Operator *,const QSpanData * data,int y,int x,int)2048 static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *,
2049 const QSpanData *data, int y, int x, int)
2050 {
2051 const uchar *scanLine = data->texture.scanLine(y);
2052 return reinterpret_cast<const QRgba64 *>(scanLine) + x;
2053 }
2054 #endif
2055
2056 template<TextureBlendType blendType>
fetchTransformed_pixelBounds(int max,int l1,int l2,int & v)2057 inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
2058 {
2059 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2060 if (blendType == BlendTransformedTiled) {
2061 if (v < 0 || v >= max) {
2062 v %= max;
2063 if (v < 0) v += max;
2064 }
2065 } else {
2066 v = qBound(l1, v, l2);
2067 }
2068 }
2069
canUseFastMatrixPath(const qreal cx,const qreal cy,const qsizetype length,const QSpanData * data)2070 static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data)
2071 {
2072 if (Q_UNLIKELY(!data->fast_matrix))
2073 return false;
2074
2075 qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale;
2076 qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale;
2077 qreal minc = std::min(fx, fy);
2078 qreal maxc = std::max(fx, fy);
2079 fx += std::trunc(data->m11 * fixed_scale) * length;
2080 fy += std::trunc(data->m12 * fixed_scale) * length;
2081 minc = std::min(minc, std::min(fx, fy));
2082 maxc = std::max(maxc, std::max(fx, fy));
2083
2084 return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max();
2085 }
2086
2087 template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
fetchTransformed_fetcher(T * buffer,const QSpanData * data,int y,int x,int length)2088 static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data,
2089 int y, int x, int length)
2090 {
2091 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2092 const QTextureData &image = data->texture;
2093
2094 const qreal cx = x + qreal(0.5);
2095 const qreal cy = y + qreal(0.5);
2096
2097 constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
2098 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2099 if (!useFetch)
2100 Q_ASSERT(layout->bpp == bpp);
2101 // When templated 'fetch' should be inlined at compile time:
2102 const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>);
2103
2104 if (canUseFastMatrixPath(cx, cy, length, data)) {
2105 // The increment pr x in the scanline
2106 int fdx = (int)(data->m11 * fixed_scale);
2107 int fdy = (int)(data->m12 * fixed_scale);
2108
2109 int fx = int((data->m21 * cy
2110 + data->m11 * cx + data->dx) * fixed_scale);
2111 int fy = int((data->m22 * cy
2112 + data->m12 * cx + data->dy) * fixed_scale);
2113
2114 if (fdy == 0) { // simple scale, no rotation or shear
2115 int py = (fy >> 16);
2116 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2117 const uchar *src = image.scanLine(py);
2118
2119 int i = 0;
2120 if (blendType == BlendTransformed) {
2121 int fastLen = length;
2122 if (fdx > 0)
2123 fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2124 else if (fdx < 0)
2125 fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
2126
2127 for (; i < fastLen; ++i) {
2128 int x1 = (fx >> 16);
2129 int x2 = x1;
2130 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
2131 if (x1 == x2)
2132 break;
2133 if (useFetch)
2134 buffer[i] = fetch(src, x1);
2135 else
2136 buffer[i] = reinterpret_cast<const T*>(src)[x1];
2137 fx += fdx;
2138 }
2139
2140 for (; i < fastLen; ++i) {
2141 int px = (fx >> 16);
2142 if (useFetch)
2143 buffer[i] = fetch(src, px);
2144 else
2145 buffer[i] = reinterpret_cast<const T*>(src)[px];
2146 fx += fdx;
2147 }
2148 }
2149
2150 for (; i < length; ++i) {
2151 int px = (fx >> 16);
2152 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2153 if (useFetch)
2154 buffer[i] = fetch(src, px);
2155 else
2156 buffer[i] = reinterpret_cast<const T*>(src)[px];
2157 fx += fdx;
2158 }
2159 } else { // rotation or shear
2160 int i = 0;
2161 if (blendType == BlendTransformed) {
2162 int fastLen = length;
2163 if (fdx > 0)
2164 fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2165 else if (fdx < 0)
2166 fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
2167 if (fdy > 0)
2168 fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
2169 else if (fdy < 0)
2170 fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
2171
2172 for (; i < fastLen; ++i) {
2173 int x1 = (fx >> 16);
2174 int y1 = (fy >> 16);
2175 int x2 = x1;
2176 int y2 = y1;
2177 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
2178 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1);
2179 if (x1 == x2 && y1 == y2)
2180 break;
2181 if (useFetch)
2182 buffer[i] = fetch(image.scanLine(y1), x1);
2183 else
2184 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y1))[x1];
2185 fx += fdx;
2186 fy += fdy;
2187 }
2188
2189 for (; i < fastLen; ++i) {
2190 int px = (fx >> 16);
2191 int py = (fy >> 16);
2192 if (useFetch)
2193 buffer[i] = fetch(image.scanLine(py), px);
2194 else
2195 buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
2196 fx += fdx;
2197 fy += fdy;
2198 }
2199 }
2200
2201 for (; i < length; ++i) {
2202 int px = (fx >> 16);
2203 int py = (fy >> 16);
2204 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2205 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2206 if (useFetch)
2207 buffer[i] = fetch(image.scanLine(py), px);
2208 else
2209 buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
2210 fx += fdx;
2211 fy += fdy;
2212 }
2213 }
2214 } else {
2215 const qreal fdx = data->m11;
2216 const qreal fdy = data->m12;
2217 const qreal fdw = data->m13;
2218
2219 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2220 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2221 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2222
2223 T *const end = buffer + length;
2224 T *b = buffer;
2225 while (b < end) {
2226 const qreal iw = fw == 0 ? 1 : 1 / fw;
2227 const qreal tx = fx * iw;
2228 const qreal ty = fy * iw;
2229 int px = qFloor(tx);
2230 int py = qFloor(ty);
2231
2232 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2233 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2234 if (useFetch)
2235 *b = fetch(image.scanLine(py), px);
2236 else
2237 *b = reinterpret_cast<const T*>(image.scanLine(py))[px];
2238
2239 fx += fdx;
2240 fy += fdy;
2241 fw += fdw;
2242 //force increment to avoid /0
2243 if (!fw) {
2244 fw += fdw;
2245 }
2246 ++b;
2247 }
2248 }
2249 }
2250
2251 template<TextureBlendType blendType, QPixelLayout::BPP bpp>
fetchTransformed(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2252 static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data,
2253 int y, int x, int length)
2254 {
2255 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2256 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2257 fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
2258 layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
2259 return buffer;
2260 }
2261
2262 #if QT_CONFIG(raster_64bit)
2263 template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
fetchTransformed64(QRgba64 * buffer,const Operator *,const QSpanData * data,int y,int x,int length)2264 static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data,
2265 int y, int x, int length)
2266 {
2267 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2268 if (layout->bpp != QPixelLayout::BPP64) {
2269 uint buffer32[BufferSize];
2270 Q_ASSERT(length <= BufferSize);
2271 if (layout->bpp == QPixelLayout::BPP32)
2272 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
2273 else
2274 fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
2275 return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
2276 }
2277
2278 fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length);
2279 if (data->texture.format == QImage::Format_RGBA64)
2280 convertRGBA64ToRGBA64PM(buffer, length);
2281 return buffer;
2282 }
2283 #endif
2284
2285 /** \internal
2286 interpolate 4 argb pixels with the distx and disty factor.
2287 distx and disty must be between 0 and 16
2288 */
interpolate_4_pixels_16(uint tl,uint tr,uint bl,uint br,uint distx,uint disty)2289 static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
2290 {
2291 uint distxy = distx * disty;
2292 //idistx * disty = (16-distx) * disty = 16*disty - distxy
2293 //idistx * idisty = (16-distx) * (16-disty) = 16*16 - 16*distx -16*disty + distxy
2294 uint tlrb = (tl & 0x00ff00ff) * (16*16 - 16*distx - 16*disty + distxy);
2295 uint tlag = ((tl & 0xff00ff00) >> 8) * (16*16 - 16*distx - 16*disty + distxy);
2296 uint trrb = ((tr & 0x00ff00ff) * (distx*16 - distxy));
2297 uint trag = (((tr & 0xff00ff00) >> 8) * (distx*16 - distxy));
2298 uint blrb = ((bl & 0x00ff00ff) * (disty*16 - distxy));
2299 uint blag = (((bl & 0xff00ff00) >> 8) * (disty*16 - distxy));
2300 uint brrb = ((br & 0x00ff00ff) * (distxy));
2301 uint brag = (((br & 0xff00ff00) >> 8) * (distxy));
2302 return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
2303 }
2304
2305 #if defined(__SSE2__)
2306 #define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
2307 { \
2308 const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
2309 const __m128i distx_ = _mm_slli_epi16(distx, 4); \
2310 const __m128i disty_ = _mm_slli_epi16(disty, 4); \
2311 const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
2312 const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
2313 const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
2314 \
2315 __m128i tlAG = _mm_srli_epi16(tl, 8); \
2316 __m128i tlRB = _mm_and_si128(tl, colorMask); \
2317 __m128i trAG = _mm_srli_epi16(tr, 8); \
2318 __m128i trRB = _mm_and_si128(tr, colorMask); \
2319 __m128i blAG = _mm_srli_epi16(bl, 8); \
2320 __m128i blRB = _mm_and_si128(bl, colorMask); \
2321 __m128i brAG = _mm_srli_epi16(br, 8); \
2322 __m128i brRB = _mm_and_si128(br, colorMask); \
2323 \
2324 tlAG = _mm_mullo_epi16(tlAG, idxidy); \
2325 tlRB = _mm_mullo_epi16(tlRB, idxidy); \
2326 trAG = _mm_mullo_epi16(trAG, dxidy); \
2327 trRB = _mm_mullo_epi16(trRB, dxidy); \
2328 blAG = _mm_mullo_epi16(blAG, idxdy); \
2329 blRB = _mm_mullo_epi16(blRB, idxdy); \
2330 brAG = _mm_mullo_epi16(brAG, dxdy); \
2331 brRB = _mm_mullo_epi16(brRB, dxdy); \
2332 \
2333 /* Add the values, and shift to only keep 8 significant bits per colors */ \
2334 __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
2335 __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
2336 rAG = _mm_andnot_si128(colorMask, rAG); \
2337 rRB = _mm_srli_epi16(rRB, 8); \
2338 _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
2339 }
2340 #endif
2341
2342 #if defined(__ARM_NEON__)
2343 #define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
2344 { \
2345 const int16x8_t dxdy = vmulq_s16(distx, disty); \
2346 const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
2347 const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
2348 const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
2349 const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
2350 \
2351 int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
2352 int16x8_t tlRB = vandq_s16(tl, colorMask); \
2353 int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
2354 int16x8_t trRB = vandq_s16(tr, colorMask); \
2355 int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
2356 int16x8_t blRB = vandq_s16(bl, colorMask); \
2357 int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
2358 int16x8_t brRB = vandq_s16(br, colorMask); \
2359 \
2360 int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
2361 int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
2362 rAG = vmlaq_s16(rAG, trAG, dxidy); \
2363 rRB = vmlaq_s16(rRB, trRB, dxidy); \
2364 rAG = vmlaq_s16(rAG, blAG, idxdy); \
2365 rRB = vmlaq_s16(rRB, blRB, idxdy); \
2366 rAG = vmlaq_s16(rAG, brAG, dxdy); \
2367 rRB = vmlaq_s16(rRB, brRB, dxdy); \
2368 \
2369 rAG = vandq_s16(invColorMask, rAG); \
2370 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
2371 vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
2372 }
2373 #endif
2374
2375 template<TextureBlendType blendType>
2376 void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
2377
2378 template<>
fetchTransformedBilinear_pixelBounds(int max,int,int,int & v1,int & v2)2379 inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2)
2380 {
2381 v1 %= max;
2382 if (v1 < 0)
2383 v1 += max;
2384 v2 = v1 + 1;
2385 if (v2 == max)
2386 v2 = 0;
2387 Q_ASSERT(v1 >= 0 && v1 < max);
2388 Q_ASSERT(v2 >= 0 && v2 < max);
2389 }
2390
2391 template<>
fetchTransformedBilinear_pixelBounds(int,int l1,int l2,int & v1,int & v2)2392 inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2)
2393 {
2394 if (v1 < l1)
2395 v2 = v1 = l1;
2396 else if (v1 >= l2)
2397 v2 = v1 = l2;
2398 else
2399 v2 = v1 + 1;
2400 Q_ASSERT(v1 >= l1 && v1 <= l2);
2401 Q_ASSERT(v2 >= l1 && v2 <= l2);
2402 }
2403
2404 enum FastTransformTypes {
2405 SimpleScaleTransform,
2406 UpscaleTransform,
2407 DownscaleTransform,
2408 RotateTransform,
2409 FastRotateTransform,
2410 NFastTransformTypes
2411 };
2412
2413 // Completes the partial interpolation stored in IntermediateBuffer.
2414 // by performing the x-axis interpolation and joining the RB and AG buffers.
intermediate_adder(uint * b,uint * end,const IntermediateBuffer & intermediate,int offset,int & fx,int fdx)2415 static void QT_FASTCALL intermediate_adder(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
2416 {
2417 #if defined(QT_COMPILER_SUPPORTS_AVX2)
2418 extern void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
2419 if (qCpuHasFeature(ArchHaswell))
2420 return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
2421 #endif
2422
2423 // Switch to intermediate buffer coordinates
2424 fx -= offset * fixed_scale;
2425
2426 while (b < end) {
2427 const int x = (fx >> 16);
2428
2429 const uint distx = (fx & 0x0000ffff) >> 8;
2430 const uint idistx = 256 - distx;
2431 const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00;
2432 const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00;
2433 *b = (rb >> 8) | ag;
2434 b++;
2435 fx += fdx;
2436 }
2437 fx += offset * fixed_scale;
2438 }
2439
2440 typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy);
2441
2442 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_simple_scale_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int)2443 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
2444 int &fx, int &fy, int fdx, int /*fdy*/)
2445 {
2446 int y1 = (fy >> 16);
2447 int y2;
2448 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2449 const uint *s1 = (const uint *)image.scanLine(y1);
2450 const uint *s2 = (const uint *)image.scanLine(y2);
2451
2452 const int disty = (fy & 0x0000ffff) >> 8;
2453 const int idisty = 256 - disty;
2454 const int length = end - b;
2455
2456 // The intermediate buffer is generated in the positive direction
2457 const int adjust = (fdx < 0) ? fdx * length : 0;
2458 const int offset = (fx + adjust) >> 16;
2459 int x = offset;
2460
2461 IntermediateBuffer intermediate;
2462 // count is the size used in the intermediate.buffer.
2463 int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2;
2464 // length is supposed to be <= BufferSize either because data->m11 < 1 or
2465 // data->m11 < 2, and any larger buffers split
2466 Q_ASSERT(count <= BufferSize + 2);
2467 int f = 0;
2468 int lim = count;
2469 if (blendType == BlendTransformedBilinearTiled) {
2470 x %= image.width;
2471 if (x < 0) x += image.width;
2472 } else {
2473 lim = qMin(count, image.x2 - x);
2474 if (x < image.x1) {
2475 Q_ASSERT(x < image.x2);
2476 uint t = s1[image.x1];
2477 uint b = s2[image.x1];
2478 quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2479 quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2480 do {
2481 intermediate.buffer_rb[f] = rb;
2482 intermediate.buffer_ag[f] = ag;
2483 f++;
2484 x++;
2485 } while (x < image.x1 && f < lim);
2486 }
2487 }
2488
2489 if (blendType != BlendTransformedBilinearTiled) {
2490 #if defined(__SSE2__)
2491 const __m128i disty_ = _mm_set1_epi16(disty);
2492 const __m128i idisty_ = _mm_set1_epi16(idisty);
2493 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
2494
2495 lim -= 3;
2496 for (; f < lim; x += 4, f += 4) {
2497 // Load 4 pixels from s1, and split the alpha-green and red-blue component
2498 __m128i top = _mm_loadu_si128((const __m128i*)((const uint *)(s1)+x));
2499 __m128i topAG = _mm_srli_epi16(top, 8);
2500 __m128i topRB = _mm_and_si128(top, colorMask);
2501 // Multiplies each color component by idisty
2502 topAG = _mm_mullo_epi16 (topAG, idisty_);
2503 topRB = _mm_mullo_epi16 (topRB, idisty_);
2504
2505 // Same for the s2 vector
2506 __m128i bottom = _mm_loadu_si128((const __m128i*)((const uint *)(s2)+x));
2507 __m128i bottomAG = _mm_srli_epi16(bottom, 8);
2508 __m128i bottomRB = _mm_and_si128(bottom, colorMask);
2509 bottomAG = _mm_mullo_epi16 (bottomAG, disty_);
2510 bottomRB = _mm_mullo_epi16 (bottomRB, disty_);
2511
2512 // Add the values, and shift to only keep 8 significant bits per colors
2513 __m128i rAG =_mm_add_epi16(topAG, bottomAG);
2514 rAG = _mm_srli_epi16(rAG, 8);
2515 _mm_storeu_si128((__m128i*)(&intermediate.buffer_ag[f]), rAG);
2516 __m128i rRB =_mm_add_epi16(topRB, bottomRB);
2517 rRB = _mm_srli_epi16(rRB, 8);
2518 _mm_storeu_si128((__m128i*)(&intermediate.buffer_rb[f]), rRB);
2519 }
2520 #elif defined(__ARM_NEON__)
2521 const int16x8_t disty_ = vdupq_n_s16(disty);
2522 const int16x8_t idisty_ = vdupq_n_s16(idisty);
2523 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2524
2525 lim -= 3;
2526 for (; f < lim; x += 4, f += 4) {
2527 // Load 4 pixels from s1, and split the alpha-green and red-blue component
2528 int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
2529 int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
2530 int16x8_t topRB = vandq_s16(top, colorMask);
2531 // Multiplies each color component by idisty
2532 topAG = vmulq_s16(topAG, idisty_);
2533 topRB = vmulq_s16(topRB, idisty_);
2534
2535 // Same for the s2 vector
2536 int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
2537 int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
2538 int16x8_t bottomRB = vandq_s16(bottom, colorMask);
2539 bottomAG = vmulq_s16(bottomAG, disty_);
2540 bottomRB = vmulq_s16(bottomRB, disty_);
2541
2542 // Add the values, and shift to only keep 8 significant bits per colors
2543 int16x8_t rAG = vaddq_s16(topAG, bottomAG);
2544 rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
2545 vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG);
2546 int16x8_t rRB = vaddq_s16(topRB, bottomRB);
2547 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
2548 vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB);
2549 }
2550 #endif
2551 }
2552 for (; f < count; f++) { // Same as above but without simd
2553 if (blendType == BlendTransformedBilinearTiled) {
2554 if (x >= image.width) x -= image.width;
2555 } else {
2556 x = qMin(x, image.x2 - 1);
2557 }
2558
2559 uint t = s1[x];
2560 uint b = s2[x];
2561
2562 intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2563 intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2564 x++;
2565 }
2566
2567 // Now interpolate the values from the intermediate.buffer to get the final result.
2568 intermediate_adder(b, end, intermediate, offset, fx, fdx);
2569 }
2570
2571 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_upscale_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int)2572 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint *b, uint *end, const QTextureData &image,
2573 int &fx, int &fy, int fdx, int /*fdy*/)
2574 {
2575 int y1 = (fy >> 16);
2576 int y2;
2577 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2578 const uint *s1 = (const uint *)image.scanLine(y1);
2579 const uint *s2 = (const uint *)image.scanLine(y2);
2580 const int disty = (fy & 0x0000ffff) >> 8;
2581
2582 if (blendType != BlendTransformedBilinearTiled) {
2583 const qint64 min_fx = qint64(image.x1) * fixed_scale;
2584 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2585 while (b < end) {
2586 int x1 = (fx >> 16);
2587 int x2;
2588 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2589 if (x1 != x2)
2590 break;
2591 uint top = s1[x1];
2592 uint bot = s2[x1];
2593 *b = INTERPOLATE_PIXEL_256(top, 256 - disty, bot, disty);
2594 fx += fdx;
2595 ++b;
2596 }
2597 uint *boundedEnd = end;
2598 if (fdx > 0)
2599 boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
2600 else if (fdx < 0)
2601 boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
2602
2603 // A fast middle part without boundary checks
2604 while (b < boundedEnd) {
2605 int x = (fx >> 16);
2606 int distx = (fx & 0x0000ffff) >> 8;
2607 *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
2608 fx += fdx;
2609 ++b;
2610 }
2611 }
2612
2613 while (b < end) {
2614 int x1 = (fx >> 16);
2615 int x2;
2616 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1 , x1, x2);
2617 uint tl = s1[x1];
2618 uint tr = s1[x2];
2619 uint bl = s2[x1];
2620 uint br = s2[x2];
2621 int distx = (fx & 0x0000ffff) >> 8;
2622 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2623
2624 fx += fdx;
2625 ++b;
2626 }
2627 }
2628
2629 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_downscale_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int)2630 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *b, uint *end, const QTextureData &image,
2631 int &fx, int &fy, int fdx, int /*fdy*/)
2632 {
2633 int y1 = (fy >> 16);
2634 int y2;
2635 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2636 const uint *s1 = (const uint *)image.scanLine(y1);
2637 const uint *s2 = (const uint *)image.scanLine(y2);
2638 const int disty8 = (fy & 0x0000ffff) >> 8;
2639 const int disty4 = (disty8 + 0x08) >> 4;
2640
2641 if (blendType != BlendTransformedBilinearTiled) {
2642 const qint64 min_fx = qint64(image.x1) * fixed_scale;
2643 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2644 while (b < end) {
2645 int x1 = (fx >> 16);
2646 int x2;
2647 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2648 if (x1 != x2)
2649 break;
2650 uint top = s1[x1];
2651 uint bot = s2[x1];
2652 *b = INTERPOLATE_PIXEL_256(top, 256 - disty8, bot, disty8);
2653 fx += fdx;
2654 ++b;
2655 }
2656 uint *boundedEnd = end;
2657 if (fdx > 0)
2658 boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
2659 else if (fdx < 0)
2660 boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
2661 // A fast middle part without boundary checks
2662 #if defined(__SSE2__)
2663 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
2664 const __m128i v_256 = _mm_set1_epi16(256);
2665 const __m128i v_disty = _mm_set1_epi16(disty4);
2666 const __m128i v_fdx = _mm_set1_epi32(fdx*4);
2667 const __m128i v_fx_r = _mm_set1_epi32(0x8);
2668 __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
2669
2670 while (b < boundedEnd - 3) {
2671 __m128i offset = _mm_srli_epi32(v_fx, 16);
2672 const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2673 const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2674 const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2675 const int offset3 = _mm_cvtsi128_si32(offset);
2676 const __m128i tl = _mm_setr_epi32(s1[offset0], s1[offset1], s1[offset2], s1[offset3]);
2677 const __m128i tr = _mm_setr_epi32(s1[offset0 + 1], s1[offset1 + 1], s1[offset2 + 1], s1[offset3 + 1]);
2678 const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
2679 const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
2680
2681 __m128i v_distx = _mm_srli_epi16(v_fx, 8);
2682 v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4);
2683 v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2684 v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2685
2686 interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
2687 b += 4;
2688 v_fx = _mm_add_epi32(v_fx, v_fdx);
2689 }
2690 fx = _mm_cvtsi128_si32(v_fx);
2691 #elif defined(__ARM_NEON__)
2692 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2693 const int16x8_t invColorMask = vmvnq_s16(colorMask);
2694 const int16x8_t v_256 = vdupq_n_s16(256);
2695 const int16x8_t v_disty = vdupq_n_s16(disty4);
2696 const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
2697 int32x4_t v_fdx = vdupq_n_s32(fdx*4);
2698
2699 int32x4_t v_fx = vmovq_n_s32(fx);
2700 v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
2701 v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
2702 v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
2703
2704 const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
2705 const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
2706
2707 while (b < boundedEnd - 3) {
2708 uint32x4x2_t v_top, v_bot;
2709
2710 int x1 = (fx >> 16);
2711 fx += fdx;
2712 v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
2713 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
2714 x1 = (fx >> 16);
2715 fx += fdx;
2716 v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
2717 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
2718 x1 = (fx >> 16);
2719 fx += fdx;
2720 v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
2721 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
2722 x1 = (fx >> 16);
2723 fx += fdx;
2724 v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
2725 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
2726
2727 int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12);
2728 v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
2729
2730 interpolate_4_pixels_16_neon(
2731 vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
2732 vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
2733 vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
2734 colorMask, invColorMask, v_256, b);
2735 b+=4;
2736 v_fx = vaddq_s32(v_fx, v_fdx);
2737 }
2738 #endif
2739 while (b < boundedEnd) {
2740 int x = (fx >> 16);
2741 if (hasFastInterpolate4()) {
2742 int distx8 = (fx & 0x0000ffff) >> 8;
2743 *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8);
2744 } else {
2745 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
2746 *b = interpolate_4_pixels_16(s1[x], s1[x + 1], s2[x], s2[x + 1], distx4, disty4);
2747 }
2748 fx += fdx;
2749 ++b;
2750 }
2751 }
2752
2753 while (b < end) {
2754 int x1 = (fx >> 16);
2755 int x2;
2756 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2757 uint tl = s1[x1];
2758 uint tr = s1[x2];
2759 uint bl = s2[x1];
2760 uint br = s2[x2];
2761 if (hasFastInterpolate4()) {
2762 int distx8 = (fx & 0x0000ffff) >> 8;
2763 *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8);
2764 } else {
2765 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
2766 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
2767 }
2768 fx += fdx;
2769 ++b;
2770 }
2771 }
2772
2773 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_rotate_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int fdy)2774 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint *b, uint *end, const QTextureData &image,
2775 int &fx, int &fy, int fdx, int fdy)
2776 {
2777 // if we are zooming more than 8 times, we use 8bit precision for the position.
2778 while (b < end) {
2779 int x1 = (fx >> 16);
2780 int x2;
2781 int y1 = (fy >> 16);
2782 int y2;
2783
2784 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2785 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2786
2787 const uint *s1 = (const uint *)image.scanLine(y1);
2788 const uint *s2 = (const uint *)image.scanLine(y2);
2789
2790 uint tl = s1[x1];
2791 uint tr = s1[x2];
2792 uint bl = s2[x1];
2793 uint br = s2[x2];
2794
2795 int distx = (fx & 0x0000ffff) >> 8;
2796 int disty = (fy & 0x0000ffff) >> 8;
2797
2798 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2799
2800 fx += fdx;
2801 fy += fdy;
2802 ++b;
2803 }
2804 }
2805
2806 template<TextureBlendType blendType>
fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int fdy)2807 static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint *b, uint *end, const QTextureData &image,
2808 int &fx, int &fy, int fdx, int fdy)
2809 {
2810 //we are zooming less than 8x, use 4bit precision
2811 if (blendType != BlendTransformedBilinearTiled) {
2812 const qint64 min_fx = qint64(image.x1) * fixed_scale;
2813 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2814 const qint64 min_fy = qint64(image.y1) * fixed_scale;
2815 const qint64 max_fy = qint64(image.y2 - 1) * fixed_scale;
2816 // first handle the possibly bounded part in the beginning
2817 while (b < end) {
2818 int x1 = (fx >> 16);
2819 int x2;
2820 int y1 = (fy >> 16);
2821 int y2;
2822 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2823 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2824 if (x1 != x2 && y1 != y2)
2825 break;
2826 const uint *s1 = (const uint *)image.scanLine(y1);
2827 const uint *s2 = (const uint *)image.scanLine(y2);
2828 uint tl = s1[x1];
2829 uint tr = s1[x2];
2830 uint bl = s2[x1];
2831 uint br = s2[x2];
2832 if (hasFastInterpolate4()) {
2833 int distx = (fx & 0x0000ffff) >> 8;
2834 int disty = (fy & 0x0000ffff) >> 8;
2835 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2836 } else {
2837 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2838 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2839 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
2840 }
2841 fx += fdx;
2842 fy += fdy;
2843 ++b;
2844 }
2845 uint *boundedEnd = end;
2846 if (fdx > 0)
2847 boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
2848 else if (fdx < 0)
2849 boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
2850 if (fdy > 0)
2851 boundedEnd = qMin(boundedEnd, b + (max_fy - fy) / fdy);
2852 else if (fdy < 0)
2853 boundedEnd = qMin(boundedEnd, b + (min_fy - fy) / fdy);
2854
2855 // until boundedEnd we can now have a fast middle part without boundary checks
2856 #if defined(__SSE2__)
2857 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
2858 const __m128i v_256 = _mm_set1_epi16(256);
2859 const __m128i v_fdx = _mm_set1_epi32(fdx*4);
2860 const __m128i v_fdy = _mm_set1_epi32(fdy*4);
2861 const __m128i v_fxy_r = _mm_set1_epi32(0x8);
2862 __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
2863 __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
2864
2865 const uchar *textureData = image.imageData;
2866 const qsizetype bytesPerLine = image.bytesPerLine;
2867 const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
2868
2869 while (b < boundedEnd - 3) {
2870 const __m128i vy = _mm_packs_epi32(_mm_srli_epi32(v_fy, 16), _mm_setzero_si128());
2871 // 4x16bit * 4x16bit -> 4x32bit
2872 __m128i offset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epi16(vy, vbpl));
2873 offset = _mm_add_epi32(offset, _mm_srli_epi32(v_fx, 16));
2874 const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2875 const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2876 const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
2877 const int offset3 = _mm_cvtsi128_si32(offset);
2878 const uint *topData = (const uint *)(textureData);
2879 const __m128i tl = _mm_setr_epi32(topData[offset0], topData[offset1], topData[offset2], topData[offset3]);
2880 const __m128i tr = _mm_setr_epi32(topData[offset0 + 1], topData[offset1 + 1], topData[offset2 + 1], topData[offset3 + 1]);
2881 const uint *bottomData = (const uint *)(textureData + bytesPerLine);
2882 const __m128i bl = _mm_setr_epi32(bottomData[offset0], bottomData[offset1], bottomData[offset2], bottomData[offset3]);
2883 const __m128i br = _mm_setr_epi32(bottomData[offset0 + 1], bottomData[offset1 + 1], bottomData[offset2 + 1], bottomData[offset3 + 1]);
2884
2885 __m128i v_distx = _mm_srli_epi16(v_fx, 8);
2886 __m128i v_disty = _mm_srli_epi16(v_fy, 8);
2887 v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fxy_r), 4);
2888 v_disty = _mm_srli_epi16(_mm_add_epi32(v_disty, v_fxy_r), 4);
2889 v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2890 v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2891 v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
2892 v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
2893
2894 interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
2895 b += 4;
2896 v_fx = _mm_add_epi32(v_fx, v_fdx);
2897 v_fy = _mm_add_epi32(v_fy, v_fdy);
2898 }
2899 fx = _mm_cvtsi128_si32(v_fx);
2900 fy = _mm_cvtsi128_si32(v_fy);
2901 #elif defined(__ARM_NEON__)
2902 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2903 const int16x8_t invColorMask = vmvnq_s16(colorMask);
2904 const int16x8_t v_256 = vdupq_n_s16(256);
2905 int32x4_t v_fdx = vdupq_n_s32(fdx * 4);
2906 int32x4_t v_fdy = vdupq_n_s32(fdy * 4);
2907
2908 const uchar *textureData = image.imageData;
2909 const int bytesPerLine = image.bytesPerLine;
2910
2911 int32x4_t v_fx = vmovq_n_s32(fx);
2912 int32x4_t v_fy = vmovq_n_s32(fy);
2913 v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
2914 v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1);
2915 v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
2916 v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2);
2917 v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
2918 v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3);
2919
2920 const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
2921 const int32x4_t v_round = vdupq_n_s32(0x0800);
2922
2923 while (b < boundedEnd - 3) {
2924 uint32x4x2_t v_top, v_bot;
2925
2926 int x1 = (fx >> 16);
2927 int y1 = (fy >> 16);
2928 fx += fdx; fy += fdy;
2929 const uchar *sl = textureData + bytesPerLine * y1;
2930 const uint *s1 = reinterpret_cast<const uint *>(sl);
2931 const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2932 v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
2933 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
2934 x1 = (fx >> 16);
2935 y1 = (fy >> 16);
2936 fx += fdx; fy += fdy;
2937 sl = textureData + bytesPerLine * y1;
2938 s1 = reinterpret_cast<const uint *>(sl);
2939 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2940 v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
2941 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
2942 x1 = (fx >> 16);
2943 y1 = (fy >> 16);
2944 fx += fdx; fy += fdy;
2945 sl = textureData + bytesPerLine * y1;
2946 s1 = reinterpret_cast<const uint *>(sl);
2947 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2948 v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
2949 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
2950 x1 = (fx >> 16);
2951 y1 = (fy >> 16);
2952 fx += fdx; fy += fdy;
2953 sl = textureData + bytesPerLine * y1;
2954 s1 = reinterpret_cast<const uint *>(sl);
2955 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2956 v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
2957 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
2958
2959 int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12);
2960 int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12);
2961 v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
2962 v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16));
2963 int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4);
2964
2965 interpolate_4_pixels_16_neon(
2966 vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
2967 vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
2968 vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
2969 v_disty_, colorMask, invColorMask, v_256, b);
2970 b += 4;
2971 v_fx = vaddq_s32(v_fx, v_fdx);
2972 v_fy = vaddq_s32(v_fy, v_fdy);
2973 }
2974 #endif
2975 while (b < boundedEnd) {
2976 int x = (fx >> 16);
2977 int y = (fy >> 16);
2978
2979 const uint *s1 = (const uint *)image.scanLine(y);
2980 const uint *s2 = (const uint *)image.scanLine(y + 1);
2981
2982 if (hasFastInterpolate4()) {
2983 int distx = (fx & 0x0000ffff) >> 8;
2984 int disty = (fy & 0x0000ffff) >> 8;
2985 *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
2986 } else {
2987 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2988 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2989 *b = interpolate_4_pixels_16(s1[x], s1[x + 1], s2[x], s2[x + 1], distx, disty);
2990 }
2991
2992 fx += fdx;
2993 fy += fdy;
2994 ++b;
2995 }
2996 }
2997
2998 while (b < end) {
2999 int x1 = (fx >> 16);
3000 int x2;
3001 int y1 = (fy >> 16);
3002 int y2;
3003
3004 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3005 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3006
3007 const uint *s1 = (const uint *)image.scanLine(y1);
3008 const uint *s2 = (const uint *)image.scanLine(y2);
3009
3010 uint tl = s1[x1];
3011 uint tr = s1[x2];
3012 uint bl = s2[x1];
3013 uint br = s2[x2];
3014
3015 if (hasFastInterpolate4()) {
3016 int distx = (fx & 0x0000ffff) >> 8;
3017 int disty = (fy & 0x0000ffff) >> 8;
3018 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
3019 } else {
3020 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3021 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3022 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3023 }
3024
3025 fx += fdx;
3026 fy += fdy;
3027 ++b;
3028 }
3029 }
3030
3031
3032 static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = {
3033 {
3034 fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>,
3035 fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
3036 fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
3037 fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
3038 fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
3039 },
3040 {
3041 fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>,
3042 fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
3043 fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
3044 fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
3045 fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled>
3046 }
3047 };
3048
3049 template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */
fetchTransformedBilinearARGB32PM(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)3050 static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *,
3051 const QSpanData *data, int y, int x,
3052 int length)
3053 {
3054 const qreal cx = x + qreal(0.5);
3055 const qreal cy = y + qreal(0.5);
3056 Q_CONSTEXPR int tiled = (blendType == BlendTransformedBilinearTiled) ? 1 : 0;
3057
3058 uint *end = buffer + length;
3059 uint *b = buffer;
3060 if (canUseFastMatrixPath(cx, cy, length, data)) {
3061 // The increment pr x in the scanline
3062 int fdx = (int)(data->m11 * fixed_scale);
3063 int fdy = (int)(data->m12 * fixed_scale);
3064
3065 int fx = int((data->m21 * cy
3066 + data->m11 * cx + data->dx) * fixed_scale);
3067 int fy = int((data->m22 * cy
3068 + data->m12 * cx + data->dy) * fixed_scale);
3069
3070 fx -= half_point;
3071 fy -= half_point;
3072
3073 if (fdy == 0) { // simple scale, no rotation or shear
3074 if (qAbs(fdx) <= fixed_scale) {
3075 // simple scale up on X
3076 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3077 } else if (qAbs(fdx) <= 2 * fixed_scale) {
3078 // simple scale down on X, less than 2x
3079 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
3080 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
3081 if (mid != length)
3082 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
3083 } else if (qAbs(data->m22) < qreal(1./8.)) {
3084 // scale up more than 8x (on Y)
3085 bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3086 } else {
3087 // scale down on X
3088 bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3089 }
3090 } else { // rotation or shear
3091 if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) {
3092 // if we are zooming more than 8 times, we use 8bit precision for the position.
3093 bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
3094 } else {
3095 // we are zooming less than 8x, use 4bit precision
3096 bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
3097 }
3098 }
3099 } else {
3100 const QTextureData &image = data->texture;
3101
3102 const qreal fdx = data->m11;
3103 const qreal fdy = data->m12;
3104 const qreal fdw = data->m13;
3105
3106 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3107 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3108 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3109
3110 while (b < end) {
3111 const qreal iw = fw == 0 ? 1 : 1 / fw;
3112 const qreal px = fx * iw - qreal(0.5);
3113 const qreal py = fy * iw - qreal(0.5);
3114
3115 int x1 = int(px) - (px < 0);
3116 int x2;
3117 int y1 = int(py) - (py < 0);
3118 int y2;
3119
3120 int distx = int((px - x1) * 256);
3121 int disty = int((py - y1) * 256);
3122
3123 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3124 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3125
3126 const uint *s1 = (const uint *)data->texture.scanLine(y1);
3127 const uint *s2 = (const uint *)data->texture.scanLine(y2);
3128
3129 uint tl = s1[x1];
3130 uint tr = s1[x2];
3131 uint bl = s2[x1];
3132 uint br = s2[x2];
3133
3134 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
3135
3136 fx += fdx;
3137 fy += fdy;
3138 fw += fdw;
3139 //force increment to avoid /0
3140 if (!fw) {
3141 fw += fdw;
3142 }
3143 ++b;
3144 }
3145 }
3146
3147 return buffer;
3148 }
3149
3150 template<TextureBlendType blendType>
fetchTransformedBilinear_simple_scale_helper(uint * b,uint * end,const QTextureData & image,int & fx,int & fy,int fdx,int)3151 static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
3152 int &fx, int &fy, int fdx, int /*fdy*/)
3153 {
3154 const QPixelLayout *layout = &qPixelLayouts[image.format];
3155 const QVector<QRgb> *clut = image.colorTable;
3156 const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM;
3157
3158 int y1 = (fy >> 16);
3159 int y2;
3160 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3161 const uchar *s1 = image.scanLine(y1);
3162 const uchar *s2 = image.scanLine(y2);
3163
3164 const int disty = (fy & 0x0000ffff) >> 8;
3165 const int idisty = 256 - disty;
3166 const int length = end - b;
3167
3168 // The intermediate buffer is generated in the positive direction
3169 const int adjust = (fdx < 0) ? fdx * length : 0;
3170 const int offset = (fx + adjust) >> 16;
3171 int x = offset;
3172
3173 IntermediateBuffer intermediate;
3174 uint *buf1 = intermediate.buffer_rb;
3175 uint *buf2 = intermediate.buffer_ag;
3176 const uint *ptr1;
3177 const uint *ptr2;
3178
3179 int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2;
3180 Q_ASSERT(count <= BufferSize + 2);
3181
3182 if (blendType == BlendTransformedBilinearTiled) {
3183 x %= image.width;
3184 if (x < 0)
3185 x += image.width;
3186 int len1 = qMin(count, image.width - x);
3187 int len2 = qMin(x, count - len1);
3188
3189 ptr1 = fetch(buf1, s1, x, len1, clut, nullptr);
3190 ptr2 = fetch(buf2, s2, x, len1, clut, nullptr);
3191 for (int i = 0; i < len1; ++i) {
3192 uint t = ptr1[i];
3193 uint b = ptr2[i];
3194 buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3195 buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3196 }
3197
3198 if (len2) {
3199 ptr1 = fetch(buf1 + len1, s1, 0, len2, clut, nullptr);
3200 ptr2 = fetch(buf2 + len1, s2, 0, len2, clut, nullptr);
3201 for (int i = 0; i < len2; ++i) {
3202 uint t = ptr1[i];
3203 uint b = ptr2[i];
3204 buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3205 buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3206 }
3207 }
3208 // Generate the rest by repeatedly repeating the previous set of pixels
3209 for (int i = image.width; i < count; ++i) {
3210 buf1[i] = buf1[i - image.width];
3211 buf2[i] = buf2[i - image.width];
3212 }
3213 } else {
3214 int start = qMax(x, image.x1);
3215 int end = qMin(x + count, image.x2);
3216 int len = qMax(1, end - start);
3217 int leading = start - x;
3218
3219 ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr);
3220 ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr);
3221
3222 for (int i = 0; i < len; ++i) {
3223 uint t = ptr1[i];
3224 uint b = ptr2[i];
3225 buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3226 buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3227 }
3228
3229 for (int i = 0; i < leading; ++i) {
3230 buf1[i] = buf1[leading];
3231 buf2[i] = buf2[leading];
3232 }
3233 for (int i = leading + len; i < count; ++i) {
3234 buf1[i] = buf1[i - 1];
3235 buf2[i] = buf2[i - 1];
3236 }
3237 }
3238
3239 // Now interpolate the values from the intermediate.buffer to get the final result.
3240 intermediate_adder(b, end, intermediate, offset, fx, fdx);
3241 }
3242
3243
3244 template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
fetchTransformedBilinear_fetcher(T * buf1,T * buf2,const int len,const QTextureData & image,int fx,int fy,const int fdx,const int fdy)3245 static void QT_FASTCALL fetchTransformedBilinear_fetcher(T *buf1, T *buf2, const int len, const QTextureData &image,
3246 int fx, int fy, const int fdx, const int fdy)
3247 {
3248 const QPixelLayout &layout = qPixelLayouts[image.format];
3249 constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
3250 if (useFetch)
3251 Q_ASSERT(sizeof(T) == sizeof(uint));
3252 else
3253 Q_ASSERT(layout.bpp == bpp);
3254 const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel<bpp>;
3255 if (fdy == 0) {
3256 int y1 = (fy >> 16);
3257 int y2;
3258 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3259 const uchar *s1 = image.scanLine(y1);
3260 const uchar *s2 = image.scanLine(y2);
3261
3262 int i = 0;
3263 if (blendType == BlendTransformedBilinear) {
3264 for (; i < len; ++i) {
3265 int x1 = (fx >> 16);
3266 int x2;
3267 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3268 if (x1 != x2)
3269 break;
3270 if (useFetch) {
3271 buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
3272 buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
3273 } else {
3274 buf1[i * 2 + 0] = buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x1];
3275 buf2[i * 2 + 0] = buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x1];
3276 }
3277 fx += fdx;
3278 }
3279 int fastLen = len;
3280 if (fdx > 0)
3281 fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
3282 else if (fdx < 0)
3283 fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
3284
3285 for (; i < fastLen; ++i) {
3286 int x = (fx >> 16);
3287 if (useFetch) {
3288 buf1[i * 2 + 0] = fetch1(s1, x);
3289 buf1[i * 2 + 1] = fetch1(s1, x + 1);
3290 buf2[i * 2 + 0] = fetch1(s2, x);
3291 buf2[i * 2 + 1] = fetch1(s2, x + 1);
3292 } else {
3293 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
3294 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
3295 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
3296 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
3297 }
3298 fx += fdx;
3299 }
3300 }
3301
3302 for (; i < len; ++i) {
3303 int x1 = (fx >> 16);
3304 int x2;
3305 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3306 if (useFetch) {
3307 buf1[i * 2 + 0] = fetch1(s1, x1);
3308 buf1[i * 2 + 1] = fetch1(s1, x2);
3309 buf2[i * 2 + 0] = fetch1(s2, x1);
3310 buf2[i * 2 + 1] = fetch1(s2, x2);
3311 } else {
3312 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3313 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3314 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3315 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3316 }
3317 fx += fdx;
3318 }
3319 } else {
3320 int i = 0;
3321 if (blendType == BlendTransformedBilinear) {
3322 for (; i < len; ++i) {
3323 int x1 = (fx >> 16);
3324 int x2;
3325 int y1 = (fy >> 16);
3326 int y2;
3327 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3328 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3329 if (x1 != x2 && y1 != y2)
3330 break;
3331 const uchar *s1 = image.scanLine(y1);
3332 const uchar *s2 = image.scanLine(y2);
3333 if (useFetch) {
3334 buf1[i * 2 + 0] = fetch1(s1, x1);
3335 buf1[i * 2 + 1] = fetch1(s1, x2);
3336 buf2[i * 2 + 0] = fetch1(s2, x1);
3337 buf2[i * 2 + 1] = fetch1(s2, x2);
3338 } else {
3339 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3340 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3341 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3342 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3343 }
3344 fx += fdx;
3345 fy += fdy;
3346 }
3347 int fastLen = len;
3348 if (fdx > 0)
3349 fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
3350 else if (fdx < 0)
3351 fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
3352 if (fdy > 0)
3353 fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
3354 else if (fdy < 0)
3355 fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
3356
3357 for (; i < fastLen; ++i) {
3358 int x = (fx >> 16);
3359 int y = (fy >> 16);
3360 const uchar *s1 = image.scanLine(y);
3361 const uchar *s2 = s1 + image.bytesPerLine;
3362 if (useFetch) {
3363 buf1[i * 2 + 0] = fetch1(s1, x);
3364 buf1[i * 2 + 1] = fetch1(s1, x + 1);
3365 buf2[i * 2 + 0] = fetch1(s2, x);
3366 buf2[i * 2 + 1] = fetch1(s2, x + 1);
3367 } else {
3368 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
3369 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
3370 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
3371 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
3372 }
3373 fx += fdx;
3374 fy += fdy;
3375 }
3376 }
3377
3378 for (; i < len; ++i) {
3379 int x1 = (fx >> 16);
3380 int x2;
3381 int y1 = (fy >> 16);
3382 int y2;
3383 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3384 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3385
3386 const uchar *s1 = image.scanLine(y1);
3387 const uchar *s2 = image.scanLine(y2);
3388 if (useFetch) {
3389 buf1[i * 2 + 0] = fetch1(s1, x1);
3390 buf1[i * 2 + 1] = fetch1(s1, x2);
3391 buf2[i * 2 + 0] = fetch1(s2, x1);
3392 buf2[i * 2 + 1] = fetch1(s2, x2);
3393 } else {
3394 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3395 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3396 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3397 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3398 }
3399 fx += fdx;
3400 fy += fdy;
3401 }
3402 }
3403 }
3404
3405 // blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
3406 template<TextureBlendType blendType, QPixelLayout::BPP bpp>
fetchTransformedBilinear(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)3407 static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *,
3408 const QSpanData *data, int y, int x, int length)
3409 {
3410 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
3411 const QVector<QRgb> *clut = data->texture.colorTable;
3412 Q_ASSERT(bpp == QPixelLayout::BPPNone || layout->bpp == bpp);
3413
3414 const qreal cx = x + qreal(0.5);
3415 const qreal cy = y + qreal(0.5);
3416
3417 if (canUseFastMatrixPath(cx, cy, length, data)) {
3418 // The increment pr x in the scanline
3419 int fdx = (int)(data->m11 * fixed_scale);
3420 int fdy = (int)(data->m12 * fixed_scale);
3421
3422 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3423 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3424
3425 fx -= half_point;
3426 fy -= half_point;
3427
3428 if (fdy == 0) { // simple scale, no rotation or shear
3429 if (qAbs(fdx) <= fixed_scale) { // scale up on X
3430 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
3431 } else if (qAbs(fdx) <= 2 * fixed_scale) { // scale down on X less than 2x
3432 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
3433 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
3434 if (mid != length)
3435 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
3436 } else {
3437 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
3438
3439 uint buf1[BufferSize];
3440 uint buf2[BufferSize];
3441 uint *b = buffer;
3442 while (length) {
3443 int len = qMin(length, BufferSize / 2);
3444 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0);
3445 layout->convertToARGB32PM(buf1, len * 2, clut);
3446 layout->convertToARGB32PM(buf2, len * 2, clut);
3447
3448 if (hasFastInterpolate4() || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y)
3449 int disty = (fy & 0x0000ffff) >> 8;
3450 for (int i = 0; i < len; ++i) {
3451 int distx = (fx & 0x0000ffff) >> 8;
3452 b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
3453 fx += fdx;
3454 }
3455 } else {
3456 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3457 for (int i = 0; i < len; ++i) {
3458 uint tl = buf1[i * 2 + 0];
3459 uint tr = buf1[i * 2 + 1];
3460 uint bl = buf2[i * 2 + 0];
3461 uint br = buf2[i * 2 + 1];
3462 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3463 b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3464 fx += fdx;
3465 }
3466 }
3467 length -= len;
3468 b += len;
3469 }
3470 }
3471 } else { // rotation or shear
3472 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
3473
3474 uint buf1[BufferSize];
3475 uint buf2[BufferSize];
3476 uint *b = buffer;
3477 while (length) {
3478 int len = qMin(length, BufferSize / 2);
3479 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3480 layout->convertToARGB32PM(buf1, len * 2, clut);
3481 layout->convertToARGB32PM(buf2, len * 2, clut);
3482
3483 if (hasFastInterpolate4() || qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.)) {
3484 // If we are zooming more than 8 times, we use 8bit precision for the position.
3485 for (int i = 0; i < len; ++i) {
3486 int distx = (fx & 0x0000ffff) >> 8;
3487 int disty = (fy & 0x0000ffff) >> 8;
3488
3489 b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
3490 fx += fdx;
3491 fy += fdy;
3492 }
3493 } else {
3494 // We are zooming less than 8x, use 4bit precision
3495 for (int i = 0; i < len; ++i) {
3496 uint tl = buf1[i * 2 + 0];
3497 uint tr = buf1[i * 2 + 1];
3498 uint bl = buf2[i * 2 + 0];
3499 uint br = buf2[i * 2 + 1];
3500
3501 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3502 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3503
3504 b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3505 fx += fdx;
3506 fy += fdy;
3507 }
3508 }
3509
3510 length -= len;
3511 b += len;
3512 }
3513 }
3514 } else {
3515 // When templated 'fetch' should be inlined at compile time:
3516 const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : fetchPixel<bpp>;
3517
3518 const QTextureData &image = data->texture;
3519
3520 const qreal fdx = data->m11;
3521 const qreal fdy = data->m12;
3522 const qreal fdw = data->m13;
3523
3524 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3525 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3526 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3527
3528 uint buf1[BufferSize];
3529 uint buf2[BufferSize];
3530 uint *b = buffer;
3531
3532 int distxs[BufferSize / 2];
3533 int distys[BufferSize / 2];
3534
3535 while (length) {
3536 int len = qMin(length, BufferSize / 2);
3537 for (int i = 0; i < len; ++i) {
3538 const qreal iw = fw == 0 ? 1 : 1 / fw;
3539 const qreal px = fx * iw - qreal(0.5);
3540 const qreal py = fy * iw - qreal(0.5);
3541
3542 int x1 = int(px) - (px < 0);
3543 int x2;
3544 int y1 = int(py) - (py < 0);
3545 int y2;
3546
3547 distxs[i] = int((px - x1) * 256);
3548 distys[i] = int((py - y1) * 256);
3549
3550 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3551 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3552
3553 const uchar *s1 = data->texture.scanLine(y1);
3554 const uchar *s2 = data->texture.scanLine(y2);
3555 buf1[i * 2 + 0] = fetch1(s1, x1);
3556 buf1[i * 2 + 1] = fetch1(s1, x2);
3557 buf2[i * 2 + 0] = fetch1(s2, x1);
3558 buf2[i * 2 + 1] = fetch1(s2, x2);
3559
3560 fx += fdx;
3561 fy += fdy;
3562 fw += fdw;
3563 //force increment to avoid /0
3564 if (!fw)
3565 fw += fdw;
3566 }
3567
3568 layout->convertToARGB32PM(buf1, len * 2, clut);
3569 layout->convertToARGB32PM(buf2, len * 2, clut);
3570
3571 for (int i = 0; i < len; ++i) {
3572 int distx = distxs[i];
3573 int disty = distys[i];
3574
3575 b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
3576 }
3577 length -= len;
3578 b += len;
3579 }
3580 }
3581
3582 return buffer;
3583 }
3584
3585 #if QT_CONFIG(raster_64bit)
3586 template<TextureBlendType blendType>
fetchTransformedBilinear64_uint32(QRgba64 * buffer,const QSpanData * data,int y,int x,int length)3587 static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 *buffer, const QSpanData *data,
3588 int y, int x, int length)
3589 {
3590 const QTextureData &texture = data->texture;
3591 const QPixelLayout *layout = &qPixelLayouts[texture.format];
3592 const QVector<QRgb> *clut = data->texture.colorTable;
3593
3594 const qreal cx = x + qreal(0.5);
3595 const qreal cy = y + qreal(0.5);
3596
3597 uint sbuf1[BufferSize];
3598 uint sbuf2[BufferSize];
3599 alignas(8) QRgba64 buf1[BufferSize];
3600 alignas(8) QRgba64 buf2[BufferSize];
3601 QRgba64 *end = buffer + length;
3602 QRgba64 *b = buffer;
3603
3604 if (canUseFastMatrixPath(cx, cy, length, data)) {
3605 // The increment pr x in the scanline
3606 const int fdx = (int)(data->m11 * fixed_scale);
3607 const int fdy = (int)(data->m12 * fixed_scale);
3608
3609 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3610 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3611
3612 fx -= half_point;
3613 fy -= half_point;
3614
3615 const auto fetcher =
3616 (layout->bpp == QPixelLayout::BPP32)
3617 ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
3618 : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
3619
3620 if (fdy == 0) { //simple scale, no rotation
3621 while (length) {
3622 int len = qMin(length, BufferSize / 2);
3623 int disty = (fy & 0x0000ffff);
3624 #if defined(__SSE2__)
3625 const __m128i vdy = _mm_set1_epi16(disty);
3626 const __m128i vidy = _mm_set1_epi16(0x10000 - disty);
3627 #endif
3628 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
3629
3630 layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3631 if (disty)
3632 layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3633
3634 for (int i = 0; i < len; ++i) {
3635 int distx = (fx & 0x0000ffff);
3636 #if defined(__SSE2__)
3637 __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2));
3638 if (disty) {
3639 __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2));
3640 vt = _mm_mulhi_epu16(vt, vidy);
3641 vb = _mm_mulhi_epu16(vb, vdy);
3642 vt = _mm_add_epi16(vt, vb);
3643 }
3644 if (distx) {
3645 const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
3646 const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
3647 vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
3648 vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
3649 }
3650 _mm_storel_epi64((__m128i*)(b+i), vt);
3651 #else
3652 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3653 #endif
3654 fx += fdx;
3655 }
3656 length -= len;
3657 b += len;
3658 }
3659 } else { // rotation or shear
3660 while (b < end) {
3661 int len = qMin(length, BufferSize / 2);
3662
3663 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
3664
3665 layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3666 layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3667
3668 for (int i = 0; i < len; ++i) {
3669 int distx = (fx & 0x0000ffff);
3670 int disty = (fy & 0x0000ffff);
3671 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3672 fx += fdx;
3673 fy += fdy;
3674 }
3675
3676 length -= len;
3677 b += len;
3678 }
3679 }
3680 } else { // !(data->fast_matrix)
3681 const QTextureData &image = data->texture;
3682
3683 const qreal fdx = data->m11;
3684 const qreal fdy = data->m12;
3685 const qreal fdw = data->m13;
3686
3687 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3688 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3689 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3690
3691 FetchPixelFunc fetch = qFetchPixel[layout->bpp];
3692
3693 int distxs[BufferSize / 2];
3694 int distys[BufferSize / 2];
3695
3696 while (b < end) {
3697 int len = qMin(length, BufferSize / 2);
3698 for (int i = 0; i < len; ++i) {
3699 const qreal iw = fw == 0 ? 1 : 1 / fw;
3700 const qreal px = fx * iw - qreal(0.5);
3701 const qreal py = fy * iw - qreal(0.5);
3702
3703 int x1 = qFloor(px);
3704 int x2;
3705 int y1 = qFloor(py);
3706 int y2;
3707
3708 distxs[i] = int((px - x1) * (1<<16));
3709 distys[i] = int((py - y1) * (1<<16));
3710
3711 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3712 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3713
3714 const uchar *s1 = texture.scanLine(y1);
3715 const uchar *s2 = texture.scanLine(y2);
3716
3717 sbuf1[i * 2 + 0] = fetch(s1, x1);
3718 sbuf1[i * 2 + 1] = fetch(s1, x2);
3719 sbuf2[i * 2 + 0] = fetch(s2, x1);
3720 sbuf2[i * 2 + 1] = fetch(s2, x2);
3721
3722 fx += fdx;
3723 fy += fdy;
3724 fw += fdw;
3725 //force increment to avoid /0
3726 if (!fw)
3727 fw += fdw;
3728 }
3729
3730 layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3731 layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3732
3733 for (int i = 0; i < len; ++i) {
3734 int distx = distxs[i];
3735 int disty = distys[i];
3736 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3737 }
3738
3739 length -= len;
3740 b += len;
3741 }
3742 }
3743 return buffer;
3744 }
3745
3746 template<TextureBlendType blendType>
fetchTransformedBilinear64_uint64(QRgba64 * buffer,const QSpanData * data,int y,int x,int length)3747 static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 *buffer, const QSpanData *data,
3748 int y, int x, int length)
3749 {
3750 const QTextureData &texture = data->texture;
3751 Q_ASSERT(qPixelLayouts[texture.format].bpp == QPixelLayout::BPP64);
3752 const auto convert = (data->texture.format == QImage::Format_RGBA64) ? convertRGBA64ToRGBA64PM : convertRGBA64PMToRGBA64PM;
3753
3754 const qreal cx = x + qreal(0.5);
3755 const qreal cy = y + qreal(0.5);
3756
3757 alignas(8) QRgba64 buf1[BufferSize];
3758 alignas(8) QRgba64 buf2[BufferSize];
3759 QRgba64 *end = buffer + length;
3760 QRgba64 *b = buffer;
3761
3762 if (canUseFastMatrixPath(cx, cy, length, data)) {
3763 // The increment pr x in the scanline
3764 const int fdx = (int)(data->m11 * fixed_scale);
3765 const int fdy = (int)(data->m12 * fixed_scale);
3766
3767 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3768 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3769
3770 fx -= half_point;
3771 fy -= half_point;
3772 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
3773
3774 if (fdy == 0) { //simple scale, no rotation
3775 while (length) {
3776 int len = qMin(length, BufferSize / 2);
3777 int disty = (fy & 0x0000ffff);
3778 #if defined(__SSE2__)
3779 const __m128i vdy = _mm_set1_epi16(disty);
3780 const __m128i vidy = _mm_set1_epi16(0x10000 - disty);
3781 #endif
3782 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3783
3784 convert(buf1, len * 2);
3785 if (disty)
3786 convert(buf2, len * 2);
3787
3788 for (int i = 0; i < len; ++i) {
3789 int distx = (fx & 0x0000ffff);
3790 #if defined(__SSE2__)
3791 __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2));
3792 if (disty) {
3793 __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2));
3794 vt = _mm_mulhi_epu16(vt, vidy);
3795 vb = _mm_mulhi_epu16(vb, vdy);
3796 vt = _mm_add_epi16(vt, vb);
3797 }
3798 if (distx) {
3799 const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
3800 const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
3801 vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
3802 vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
3803 }
3804 _mm_storel_epi64((__m128i*)(b+i), vt);
3805 #else
3806 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3807 #endif
3808 fx += fdx;
3809 }
3810 length -= len;
3811 b += len;
3812 }
3813 } else { // rotation or shear
3814 while (b < end) {
3815 int len = qMin(length, BufferSize / 2);
3816
3817 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3818
3819 convert(buf1, len * 2);
3820 convert(buf2, len * 2);
3821
3822 for (int i = 0; i < len; ++i) {
3823 int distx = (fx & 0x0000ffff);
3824 int disty = (fy & 0x0000ffff);
3825 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3826 fx += fdx;
3827 fy += fdy;
3828 }
3829
3830 length -= len;
3831 b += len;
3832 }
3833 }
3834 } else { // !(data->fast_matrix)
3835 const QTextureData &image = data->texture;
3836
3837 const qreal fdx = data->m11;
3838 const qreal fdy = data->m12;
3839 const qreal fdw = data->m13;
3840
3841 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3842 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3843 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3844
3845 int distxs[BufferSize / 2];
3846 int distys[BufferSize / 2];
3847
3848 while (b < end) {
3849 int len = qMin(length, BufferSize / 2);
3850 for (int i = 0; i < len; ++i) {
3851 const qreal iw = fw == 0 ? 1 : 1 / fw;
3852 const qreal px = fx * iw - qreal(0.5);
3853 const qreal py = fy * iw - qreal(0.5);
3854
3855 int x1 = int(px) - (px < 0);
3856 int x2;
3857 int y1 = int(py) - (py < 0);
3858 int y2;
3859
3860 distxs[i] = int((px - x1) * (1<<16));
3861 distys[i] = int((py - y1) * (1<<16));
3862
3863 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3864 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3865
3866 const uchar *s1 = texture.scanLine(y1);
3867 const uchar *s2 = texture.scanLine(y2);
3868
3869 buf1[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s1)[x1];
3870 buf1[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s1)[x2];
3871 buf2[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s2)[x1];
3872 buf2[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s2)[x2];
3873
3874 fx += fdx;
3875 fy += fdy;
3876 fw += fdw;
3877 //force increment to avoid /0
3878 if (!fw)
3879 fw += fdw;
3880 }
3881
3882 convert(buf1, len * 2);
3883 convert(buf2, len * 2);
3884
3885 for (int i = 0; i < len; ++i) {
3886 int distx = distxs[i];
3887 int disty = distys[i];
3888 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3889 }
3890
3891 length -= len;
3892 b += len;
3893 }
3894 }
3895 return buffer;
3896 }
3897
3898 template<TextureBlendType blendType>
fetchTransformedBilinear64(QRgba64 * buffer,const Operator *,const QSpanData * data,int y,int x,int length)3899 static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *,
3900 const QSpanData *data, int y, int x, int length)
3901 {
3902 if (qPixelLayouts[data->texture.format].bpp == QPixelLayout::BPP64)
3903 return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length);
3904 return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length);
3905 }
3906 #endif
3907
3908 // FetchUntransformed can have more specialized methods added depending on SIMD features.
3909 static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = {
3910 nullptr, // Invalid
3911 fetchUntransformed, // Mono
3912 fetchUntransformed, // MonoLsb
3913 fetchUntransformed, // Indexed8
3914 fetchUntransformedARGB32PM, // RGB32
3915 fetchUntransformed, // ARGB32
3916 fetchUntransformedARGB32PM, // ARGB32_Premultiplied
3917 fetchUntransformedRGB16, // RGB16
3918 fetchUntransformed, // ARGB8565_Premultiplied
3919 fetchUntransformed, // RGB666
3920 fetchUntransformed, // ARGB6666_Premultiplied
3921 fetchUntransformed, // RGB555
3922 fetchUntransformed, // ARGB8555_Premultiplied
3923 fetchUntransformed, // RGB888
3924 fetchUntransformed, // RGB444
3925 fetchUntransformed, // ARGB4444_Premultiplied
3926 fetchUntransformed, // RGBX8888
3927 fetchUntransformed, // RGBA8888
3928 fetchUntransformed, // RGBA8888_Premultiplied
3929 fetchUntransformed, // Format_BGR30
3930 fetchUntransformed, // Format_A2BGR30_Premultiplied
3931 fetchUntransformed, // Format_RGB30
3932 fetchUntransformed, // Format_A2RGB30_Premultiplied
3933 fetchUntransformed, // Alpha8
3934 fetchUntransformed, // Grayscale8
3935 fetchUntransformed, // RGBX64
3936 fetchUntransformed, // RGBA64
3937 fetchUntransformed, // RGBA64_Premultiplied
3938 fetchUntransformed, // Grayscale16
3939 fetchUntransformed, // BGR888
3940 };
3941
3942 static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = {
3943 fetchUntransformed, // Untransformed
3944 fetchUntransformed, // Tiled
3945 fetchTransformed<BlendTransformed, QPixelLayout::BPPNone>, // Transformed
3946 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPPNone>, // TransformedTiled
3947 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPPNone>, // TransformedBilinear
3948 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPPNone> // TransformedBilinearTiled
3949 };
3950
3951 static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = {
3952 fetchUntransformedARGB32PM, // Untransformed
3953 fetchUntransformedARGB32PM, // Tiled
3954 fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3955 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3956 fetchTransformedBilinearARGB32PM<BlendTransformedBilinear>, // Bilinear
3957 fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled
3958 };
3959
3960 static SourceFetchProc sourceFetchAny16[NBlendTypes] = {
3961 fetchUntransformed, // Untransformed
3962 fetchUntransformed, // Tiled
3963 fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed
3964 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>, // TransformedTiled
3965 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>, // TransformedBilinear
3966 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled
3967 };
3968
3969 static SourceFetchProc sourceFetchAny32[NBlendTypes] = {
3970 fetchUntransformed, // Untransformed
3971 fetchUntransformed, // Tiled
3972 fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3973 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3974 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP32>, // TransformedBilinear
3975 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP32> // TransformedBilinearTiled
3976 };
3977
getSourceFetch(TextureBlendType blendType,QImage::Format format)3978 static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format)
3979 {
3980 if (format == QImage::Format_RGB32 || format == QImage::Format_ARGB32_Premultiplied)
3981 return sourceFetchARGB32PM[blendType];
3982 if (blendType == BlendUntransformed || blendType == BlendTiled)
3983 return sourceFetchUntransformed[format];
3984 if (qPixelLayouts[format].bpp == QPixelLayout::BPP16)
3985 return sourceFetchAny16[blendType];
3986 if (qPixelLayouts[format].bpp == QPixelLayout::BPP32)
3987 return sourceFetchAny32[blendType];
3988 return sourceFetchGeneric[blendType];
3989 }
3990
3991 #if QT_CONFIG(raster_64bit)
3992 static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = {
3993 fetchUntransformed64, // Untransformed
3994 fetchUntransformed64, // Tiled
3995 fetchTransformed64<BlendTransformed>, // Transformed
3996 fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
3997 fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
3998 fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
3999 };
4000
4001 static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = {
4002 fetchUntransformedRGBA64PM, // Untransformed
4003 fetchUntransformedRGBA64PM, // Tiled
4004 fetchTransformed64<BlendTransformed>, // Transformed
4005 fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
4006 fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
4007 fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
4008 };
4009
getSourceFetch64(TextureBlendType blendType,QImage::Format format)4010 static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format)
4011 {
4012 if (format == QImage::Format_RGBX64 || format == QImage::Format_RGBA64_Premultiplied)
4013 return sourceFetchRGBA64PM[blendType];
4014 return sourceFetchGeneric64[blendType];
4015 }
4016 #endif
4017
4018
4019 #define FIXPT_BITS 8
4020 #define FIXPT_SIZE (1<<FIXPT_BITS)
4021
qt_gradient_pixel_fixed(const QGradientData * data,int fixed_pos)4022 static uint qt_gradient_pixel_fixed(const QGradientData *data, int fixed_pos)
4023 {
4024 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
4025 return data->colorTable32[qt_gradient_clamp(data, ipos)];
4026 }
4027
4028 #if QT_CONFIG(raster_64bit)
qt_gradient_pixel64_fixed(const QGradientData * data,int fixed_pos)4029 static const QRgba64& qt_gradient_pixel64_fixed(const QGradientData *data, int fixed_pos)
4030 {
4031 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
4032 return data->colorTable64[qt_gradient_clamp(data, ipos)];
4033 }
4034 #endif
4035
getLinearGradientValues(LinearGradientValues * v,const QSpanData * data)4036 static void QT_FASTCALL getLinearGradientValues(LinearGradientValues *v, const QSpanData *data)
4037 {
4038 v->dx = data->gradient.linear.end.x - data->gradient.linear.origin.x;
4039 v->dy = data->gradient.linear.end.y - data->gradient.linear.origin.y;
4040 v->l = v->dx * v->dx + v->dy * v->dy;
4041 v->off = 0;
4042 if (v->l != 0) {
4043 v->dx /= v->l;
4044 v->dy /= v->l;
4045 v->off = -v->dx * data->gradient.linear.origin.x - v->dy * data->gradient.linear.origin.y;
4046 }
4047 }
4048
4049 class GradientBase32
4050 {
4051 public:
4052 typedef uint Type;
null()4053 static Type null() { return 0; }
fetchSingle(const QGradientData & gradient,qreal v)4054 static Type fetchSingle(const QGradientData& gradient, qreal v)
4055 {
4056 return qt_gradient_pixel(&gradient, v);
4057 }
fetchSingle(const QGradientData & gradient,int v)4058 static Type fetchSingle(const QGradientData& gradient, int v)
4059 {
4060 return qt_gradient_pixel_fixed(&gradient, v);
4061 }
memfill(Type * buffer,Type fill,int length)4062 static void memfill(Type *buffer, Type fill, int length)
4063 {
4064 qt_memfill32(buffer, fill, length);
4065 }
4066 };
4067
4068 #if QT_CONFIG(raster_64bit)
4069 class GradientBase64
4070 {
4071 public:
4072 typedef QRgba64 Type;
null()4073 static Type null() { return QRgba64::fromRgba64(0); }
fetchSingle(const QGradientData & gradient,qreal v)4074 static Type fetchSingle(const QGradientData& gradient, qreal v)
4075 {
4076 return qt_gradient_pixel64(&gradient, v);
4077 }
fetchSingle(const QGradientData & gradient,int v)4078 static Type fetchSingle(const QGradientData& gradient, int v)
4079 {
4080 return qt_gradient_pixel64_fixed(&gradient, v);
4081 }
memfill(Type * buffer,Type fill,int length)4082 static void memfill(Type *buffer, Type fill, int length)
4083 {
4084 qt_memfill64((quint64*)buffer, fill, length);
4085 }
4086 };
4087 #endif
4088
4089 template<class GradientBase, typename BlendType>
qt_fetch_linear_gradient_template(BlendType * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4090 static inline const BlendType * QT_FASTCALL qt_fetch_linear_gradient_template(
4091 BlendType *buffer, const Operator *op, const QSpanData *data,
4092 int y, int x, int length)
4093 {
4094 const BlendType *b = buffer;
4095 qreal t, inc;
4096
4097 bool affine = true;
4098 qreal rx=0, ry=0;
4099 if (op->linear.l == 0) {
4100 t = inc = 0;
4101 } else {
4102 rx = data->m21 * (y + qreal(0.5)) + data->m11 * (x + qreal(0.5)) + data->dx;
4103 ry = data->m22 * (y + qreal(0.5)) + data->m12 * (x + qreal(0.5)) + data->dy;
4104 t = op->linear.dx*rx + op->linear.dy*ry + op->linear.off;
4105 inc = op->linear.dx * data->m11 + op->linear.dy * data->m12;
4106 affine = !data->m13 && !data->m23;
4107
4108 if (affine) {
4109 t *= (GRADIENT_STOPTABLE_SIZE - 1);
4110 inc *= (GRADIENT_STOPTABLE_SIZE - 1);
4111 }
4112 }
4113
4114 const BlendType *end = buffer + length;
4115 if (affine) {
4116 if (inc > qreal(-1e-5) && inc < qreal(1e-5)) {
4117 GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, int(t * FIXPT_SIZE)), length);
4118 } else {
4119 if (t+inc*length < qreal(INT_MAX >> (FIXPT_BITS + 1)) &&
4120 t+inc*length > qreal(INT_MIN >> (FIXPT_BITS + 1))) {
4121 // we can use fixed point math
4122 int t_fixed = int(t * FIXPT_SIZE);
4123 int inc_fixed = int(inc * FIXPT_SIZE);
4124 while (buffer < end) {
4125 *buffer = GradientBase::fetchSingle(data->gradient, t_fixed);
4126 t_fixed += inc_fixed;
4127 ++buffer;
4128 }
4129 } else {
4130 // we have to fall back to float math
4131 while (buffer < end) {
4132 *buffer = GradientBase::fetchSingle(data->gradient, t/GRADIENT_STOPTABLE_SIZE);
4133 t += inc;
4134 ++buffer;
4135 }
4136 }
4137 }
4138 } else { // fall back to float math here as well
4139 qreal rw = data->m23 * (y + qreal(0.5)) + data->m13 * (x + qreal(0.5)) + data->m33;
4140 while (buffer < end) {
4141 qreal x = rx/rw;
4142 qreal y = ry/rw;
4143 t = (op->linear.dx*x + op->linear.dy *y) + op->linear.off;
4144
4145 *buffer = GradientBase::fetchSingle(data->gradient, t);
4146 rx += data->m11;
4147 ry += data->m12;
4148 rw += data->m13;
4149 if (!rw) {
4150 rw += data->m13;
4151 }
4152 ++buffer;
4153 }
4154 }
4155
4156 return b;
4157 }
4158
qt_fetch_linear_gradient(uint * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4159 static const uint * QT_FASTCALL qt_fetch_linear_gradient(uint *buffer, const Operator *op, const QSpanData *data,
4160 int y, int x, int length)
4161 {
4162 return qt_fetch_linear_gradient_template<GradientBase32, uint>(buffer, op, data, y, x, length);
4163 }
4164
4165 #if QT_CONFIG(raster_64bit)
qt_fetch_linear_gradient_rgb64(QRgba64 * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4166 static const QRgba64 * QT_FASTCALL qt_fetch_linear_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
4167 int y, int x, int length)
4168 {
4169 return qt_fetch_linear_gradient_template<GradientBase64, QRgba64>(buffer, op, data, y, x, length);
4170 }
4171 #endif
4172
getRadialGradientValues(RadialGradientValues * v,const QSpanData * data)4173 static void QT_FASTCALL getRadialGradientValues(RadialGradientValues *v, const QSpanData *data)
4174 {
4175 v->dx = data->gradient.radial.center.x - data->gradient.radial.focal.x;
4176 v->dy = data->gradient.radial.center.y - data->gradient.radial.focal.y;
4177
4178 v->dr = data->gradient.radial.center.radius - data->gradient.radial.focal.radius;
4179 v->sqrfr = data->gradient.radial.focal.radius * data->gradient.radial.focal.radius;
4180
4181 v->a = v->dr * v->dr - v->dx*v->dx - v->dy*v->dy;
4182 v->inv2a = 1 / (2 * v->a);
4183
4184 v->extended = !qFuzzyIsNull(data->gradient.radial.focal.radius) || v->a <= 0;
4185 }
4186
4187 template <class GradientBase>
4188 class RadialFetchPlain : public GradientBase
4189 {
4190 public:
4191 typedef typename GradientBase::Type BlendType;
fetch(BlendType * buffer,BlendType * end,const Operator * op,const QSpanData * data,qreal det,qreal delta_det,qreal delta_delta_det,qreal b,qreal delta_b)4192 static void fetch(BlendType *buffer, BlendType *end,
4193 const Operator *op, const QSpanData *data, qreal det,
4194 qreal delta_det, qreal delta_delta_det, qreal b, qreal delta_b)
4195 {
4196 if (op->radial.extended) {
4197 while (buffer < end) {
4198 BlendType result = GradientBase::null();
4199 if (det >= 0) {
4200 qreal w = qSqrt(det) - b;
4201 if (data->gradient.radial.focal.radius + op->radial.dr * w >= 0)
4202 result = GradientBase::fetchSingle(data->gradient, w);
4203 }
4204
4205 *buffer = result;
4206
4207 det += delta_det;
4208 delta_det += delta_delta_det;
4209 b += delta_b;
4210
4211 ++buffer;
4212 }
4213 } else {
4214 while (buffer < end) {
4215 *buffer++ = GradientBase::fetchSingle(data->gradient, qSqrt(det) - b);
4216
4217 det += delta_det;
4218 delta_det += delta_delta_det;
4219 b += delta_b;
4220 }
4221 }
4222 }
4223 };
4224
qt_fetch_radial_gradient_plain(uint * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4225 const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint *buffer, const Operator *op, const QSpanData *data,
4226 int y, int x, int length)
4227 {
4228 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase32>, uint>(buffer, op, data, y, x, length);
4229 }
4230
4231 static SourceFetchProc qt_fetch_radial_gradient = qt_fetch_radial_gradient_plain;
4232
4233 #if QT_CONFIG(raster_64bit)
qt_fetch_radial_gradient_rgb64(QRgba64 * buffer,const Operator * op,const QSpanData * data,int y,int x,int length)4234 const QRgba64 * QT_FASTCALL qt_fetch_radial_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
4235 int y, int x, int length)
4236 {
4237 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase64>, QRgba64>(buffer, op, data, y, x, length);
4238 }
4239 #endif
4240
4241 template <class GradientBase, typename BlendType>
qt_fetch_conical_gradient_template(BlendType * buffer,const QSpanData * data,int y,int x,int length)4242 static inline const BlendType * QT_FASTCALL qt_fetch_conical_gradient_template(
4243 BlendType *buffer, const QSpanData *data,
4244 int y, int x, int length)
4245 {
4246 const BlendType *b = buffer;
4247 qreal rx = data->m21 * (y + qreal(0.5))
4248 + data->dx + data->m11 * (x + qreal(0.5));
4249 qreal ry = data->m22 * (y + qreal(0.5))
4250 + data->dy + data->m12 * (x + qreal(0.5));
4251 bool affine = !data->m13 && !data->m23;
4252
4253 const qreal inv2pi = M_1_PI / 2.0;
4254
4255 const BlendType *end = buffer + length;
4256 if (affine) {
4257 rx -= data->gradient.conical.center.x;
4258 ry -= data->gradient.conical.center.y;
4259 while (buffer < end) {
4260 qreal angle = qAtan2(ry, rx) + data->gradient.conical.angle;
4261
4262 *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
4263
4264 rx += data->m11;
4265 ry += data->m12;
4266 ++buffer;
4267 }
4268 } else {
4269 qreal rw = data->m23 * (y + qreal(0.5))
4270 + data->m33 + data->m13 * (x + qreal(0.5));
4271 if (!rw)
4272 rw = 1;
4273 while (buffer < end) {
4274 qreal angle = qAtan2(ry/rw - data->gradient.conical.center.x,
4275 rx/rw - data->gradient.conical.center.y)
4276 + data->gradient.conical.angle;
4277
4278 *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
4279
4280 rx += data->m11;
4281 ry += data->m12;
4282 rw += data->m13;
4283 if (!rw) {
4284 rw += data->m13;
4285 }
4286 ++buffer;
4287 }
4288 }
4289 return b;
4290 }
4291
qt_fetch_conical_gradient(uint * buffer,const Operator *,const QSpanData * data,int y,int x,int length)4292 static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint *buffer, const Operator *, const QSpanData *data,
4293 int y, int x, int length)
4294 {
4295 return qt_fetch_conical_gradient_template<GradientBase32, uint>(buffer, data, y, x, length);
4296 }
4297
4298 #if QT_CONFIG(raster_64bit)
qt_fetch_conical_gradient_rgb64(QRgba64 * buffer,const Operator *,const QSpanData * data,int y,int x,int length)4299 static const QRgba64 * QT_FASTCALL qt_fetch_conical_gradient_rgb64(QRgba64 *buffer, const Operator *, const QSpanData *data,
4300 int y, int x, int length)
4301 {
4302 return qt_fetch_conical_gradient_template<GradientBase64, QRgba64>(buffer, data, y, x, length);
4303 }
4304 #endif
4305
4306 extern CompositionFunctionSolid qt_functionForModeSolid_C[];
4307 extern CompositionFunctionSolid64 qt_functionForModeSolid64_C[];
4308
4309 static const CompositionFunctionSolid *functionForModeSolid = qt_functionForModeSolid_C;
4310 #if QT_CONFIG(raster_64bit)
4311 static const CompositionFunctionSolid64 *functionForModeSolid64 = qt_functionForModeSolid64_C;
4312 #endif
4313
4314 extern CompositionFunction qt_functionForMode_C[];
4315 extern CompositionFunction64 qt_functionForMode64_C[];
4316
4317 static const CompositionFunction *functionForMode = qt_functionForMode_C;
4318 #if QT_CONFIG(raster_64bit)
4319 static const CompositionFunction64 *functionForMode64 = qt_functionForMode64_C;
4320 #endif
4321
getBlendType(const QSpanData * data)4322 static TextureBlendType getBlendType(const QSpanData *data)
4323 {
4324 TextureBlendType ft;
4325 if (data->txop <= QTransform::TxTranslate)
4326 if (data->texture.type == QTextureData::Tiled)
4327 ft = BlendTiled;
4328 else
4329 ft = BlendUntransformed;
4330 else if (data->bilinear)
4331 if (data->texture.type == QTextureData::Tiled)
4332 ft = BlendTransformedBilinearTiled;
4333 else
4334 ft = BlendTransformedBilinear;
4335 else
4336 if (data->texture.type == QTextureData::Tiled)
4337 ft = BlendTransformedTiled;
4338 else
4339 ft = BlendTransformed;
4340 return ft;
4341 }
4342
getOperator(const QSpanData * data,const QSpan * spans,int spanCount)4343 static inline Operator getOperator(const QSpanData *data, const QSpan *spans, int spanCount)
4344 {
4345 Operator op;
4346 bool solidSource = false;
4347
4348 switch(data->type) {
4349 case QSpanData::Solid:
4350 solidSource = data->solidColor.isOpaque();
4351 op.srcFetch = nullptr;
4352 #if QT_CONFIG(raster_64bit)
4353 op.srcFetch64 = nullptr;
4354 #endif
4355 break;
4356 case QSpanData::LinearGradient:
4357 solidSource = !data->gradient.alphaColor;
4358 getLinearGradientValues(&op.linear, data);
4359 op.srcFetch = qt_fetch_linear_gradient;
4360 #if QT_CONFIG(raster_64bit)
4361 op.srcFetch64 = qt_fetch_linear_gradient_rgb64;
4362 #endif
4363 break;
4364 case QSpanData::RadialGradient:
4365 solidSource = !data->gradient.alphaColor;
4366 getRadialGradientValues(&op.radial, data);
4367 op.srcFetch = qt_fetch_radial_gradient;
4368 #if QT_CONFIG(raster_64bit)
4369 op.srcFetch64 = qt_fetch_radial_gradient_rgb64;
4370 #endif
4371 break;
4372 case QSpanData::ConicalGradient:
4373 solidSource = !data->gradient.alphaColor;
4374 op.srcFetch = qt_fetch_conical_gradient;
4375 #if QT_CONFIG(raster_64bit)
4376 op.srcFetch64 = qt_fetch_conical_gradient_rgb64;
4377 #endif
4378 break;
4379 case QSpanData::Texture:
4380 solidSource = !data->texture.hasAlpha;
4381 op.srcFetch = getSourceFetch(getBlendType(data), data->texture.format);
4382 #if QT_CONFIG(raster_64bit)
4383 op.srcFetch64 = getSourceFetch64(getBlendType(data), data->texture.format);;
4384 #endif
4385 break;
4386 default:
4387 Q_UNREACHABLE();
4388 break;
4389 }
4390 #if !QT_CONFIG(raster_64bit)
4391 op.srcFetch64 = 0;
4392 #endif
4393
4394 op.mode = data->rasterBuffer->compositionMode;
4395 if (op.mode == QPainter::CompositionMode_SourceOver && solidSource)
4396 op.mode = QPainter::CompositionMode_Source;
4397
4398 op.destFetch = destFetchProc[data->rasterBuffer->format];
4399 #if QT_CONFIG(raster_64bit)
4400 op.destFetch64 = destFetchProc64[data->rasterBuffer->format];
4401 #else
4402 op.destFetch64 = 0;
4403 #endif
4404 if (op.mode == QPainter::CompositionMode_Source &&
4405 (data->type != QSpanData::Texture || data->texture.const_alpha == 256)) {
4406 const QSpan *lastSpan = spans + spanCount;
4407 bool alphaSpans = false;
4408 while (spans < lastSpan) {
4409 if (spans->coverage != 255) {
4410 alphaSpans = true;
4411 break;
4412 }
4413 ++spans;
4414 }
4415 if (!alphaSpans && spanCount > 0) {
4416 // If all spans are opaque we do not need to fetch dest.
4417 // But don't clear passthrough destFetch as they are just as fast and save destStore.
4418 if (op.destFetch != destFetchARGB32P)
4419 op.destFetch = destFetchUndefined;
4420 #if QT_CONFIG(raster_64bit)
4421 if (op.destFetch64 != destFetchRGB64)
4422 op.destFetch64 = destFetch64Undefined;
4423 #endif
4424 }
4425 }
4426
4427 op.destStore = destStoreProc[data->rasterBuffer->format];
4428 op.funcSolid = functionForModeSolid[op.mode];
4429 op.func = functionForMode[op.mode];
4430 #if QT_CONFIG(raster_64bit)
4431 op.destStore64 = destStoreProc64[data->rasterBuffer->format];
4432 op.funcSolid64 = functionForModeSolid64[op.mode];
4433 op.func64 = functionForMode64[op.mode];
4434 #else
4435 op.destStore64 = 0;
4436 op.funcSolid64 = 0;
4437 op.func64 = 0;
4438 #endif
4439
4440 return op;
4441 }
4442
spanfill_from_first(QRasterBuffer * rasterBuffer,QPixelLayout::BPP bpp,int x,int y,int length)4443 static void spanfill_from_first(QRasterBuffer *rasterBuffer, QPixelLayout::BPP bpp, int x, int y, int length)
4444 {
4445 switch (bpp) {
4446 case QPixelLayout::BPP64: {
4447 quint64 *dest = reinterpret_cast<quint64 *>(rasterBuffer->scanLine(y)) + x;
4448 qt_memfill_template(dest + 1, dest[0], length - 1);
4449 break;
4450 }
4451 case QPixelLayout::BPP32: {
4452 quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y)) + x;
4453 qt_memfill_template(dest + 1, dest[0], length - 1);
4454 break;
4455 }
4456 case QPixelLayout::BPP24: {
4457 quint24 *dest = reinterpret_cast<quint24 *>(rasterBuffer->scanLine(y)) + x;
4458 qt_memfill_template(dest + 1, dest[0], length - 1);
4459 break;
4460 }
4461 case QPixelLayout::BPP16: {
4462 quint16 *dest = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
4463 qt_memfill_template(dest + 1, dest[0], length - 1);
4464 break;
4465 }
4466 case QPixelLayout::BPP8: {
4467 uchar *dest = rasterBuffer->scanLine(y) + x;
4468 memset(dest + 1, dest[0], length - 1);
4469 break;
4470 }
4471 default:
4472 Q_UNREACHABLE();
4473 }
4474 }
4475
4476
4477 // -------------------- blend methods ---------------------
4478
blend_color_generic(int count,const QSpan * spans,void * userData)4479 static void blend_color_generic(int count, const QSpan *spans, void *userData)
4480 {
4481 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4482 uint buffer[BufferSize];
4483 Operator op = getOperator(data, nullptr, 0);
4484 const uint color = data->solidColor.toArgb32();
4485 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
4486 const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
4487
4488 while (count--) {
4489 int x = spans->x;
4490 int length = spans->len;
4491 if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length) {
4492 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
4493 op.destStore(data->rasterBuffer, x, spans->y, &color, 1);
4494 spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
4495 length = 0;
4496 }
4497
4498 while (length) {
4499 int l = qMin(BufferSize, length);
4500 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
4501 op.funcSolid(dest, l, color, spans->coverage);
4502 if (op.destStore)
4503 op.destStore(data->rasterBuffer, x, spans->y, dest, l);
4504 length -= l;
4505 x += l;
4506 }
4507 ++spans;
4508 }
4509 }
4510
blend_color_argb(int count,const QSpan * spans,void * userData)4511 static void blend_color_argb(int count, const QSpan *spans, void *userData)
4512 {
4513 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4514
4515 const Operator op = getOperator(data, nullptr, 0);
4516 const uint color = data->solidColor.toArgb32();
4517
4518 if (op.mode == QPainter::CompositionMode_Source) {
4519 // inline for performance
4520 while (count--) {
4521 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
4522 if (spans->coverage == 255) {
4523 qt_memfill(target, color, spans->len);
4524 #ifdef __SSE2__
4525 } else if (spans->len > 16) {
4526 op.funcSolid(target, spans->len, color, spans->coverage);
4527 #endif
4528 } else {
4529 uint c = BYTE_MUL(color, spans->coverage);
4530 int ialpha = 255 - spans->coverage;
4531 for (int i = 0; i < spans->len; ++i)
4532 target[i] = c + BYTE_MUL(target[i], ialpha);
4533 }
4534 ++spans;
4535 }
4536 return;
4537 }
4538
4539 while (count--) {
4540 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
4541 op.funcSolid(target, spans->len, color, spans->coverage);
4542 ++spans;
4543 }
4544 }
4545
blend_color_generic_rgb64(int count,const QSpan * spans,void * userData)4546 void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData)
4547 {
4548 #if QT_CONFIG(raster_64bit)
4549 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4550 Operator op = getOperator(data, nullptr, 0);
4551 if (!op.funcSolid64) {
4552 qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit");
4553 return blend_color_generic(count, spans, userData);
4554 }
4555
4556 alignas(8) QRgba64 buffer[BufferSize];
4557 const QRgba64 color = data->solidColor;
4558 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
4559 const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
4560
4561 while (count--) {
4562 int x = spans->x;
4563 int length = spans->len;
4564 if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length && op.destStore64) {
4565 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
4566 op.destStore64(data->rasterBuffer, x, spans->y, &color, 1);
4567 spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
4568 length = 0;
4569 }
4570
4571 while (length) {
4572 int l = qMin(BufferSize, length);
4573 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
4574 op.funcSolid64(dest, l, color, spans->coverage);
4575 if (op.destStore64)
4576 op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
4577 length -= l;
4578 x += l;
4579 }
4580 ++spans;
4581 }
4582 #else
4583 blend_color_generic(count, spans, userData);
4584 #endif
4585 }
4586
blend_color_rgb16(int count,const QSpan * spans,void * userData)4587 static void blend_color_rgb16(int count, const QSpan *spans, void *userData)
4588 {
4589 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4590
4591 /*
4592 We duplicate a little logic from getOperator() and calculate the
4593 composition mode directly. This allows blend_color_rgb16 to be used
4594 from qt_gradient_quint16 with minimal overhead.
4595 */
4596 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
4597 if (mode == QPainter::CompositionMode_SourceOver && data->solidColor.isOpaque())
4598 mode = QPainter::CompositionMode_Source;
4599
4600 if (mode == QPainter::CompositionMode_Source) {
4601 // inline for performance
4602 ushort c = data->solidColor.toRgb16();
4603 for (; count--; spans++) {
4604 if (!spans->len)
4605 continue;
4606 ushort *target = ((ushort *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
4607 if (spans->coverage == 255) {
4608 qt_memfill(target, c, spans->len);
4609 } else {
4610 ushort color = BYTE_MUL_RGB16(c, spans->coverage);
4611 int ialpha = 255 - spans->coverage;
4612 const ushort *end = target + spans->len;
4613 while (target < end) {
4614 *target = color + BYTE_MUL_RGB16(*target, ialpha);
4615 ++target;
4616 }
4617 }
4618 }
4619 return;
4620 }
4621
4622 if (mode == QPainter::CompositionMode_SourceOver) {
4623 for (; count--; spans++) {
4624 if (!spans->len)
4625 continue;
4626 uint color = BYTE_MUL(data->solidColor.toArgb32(), spans->coverage);
4627 int ialpha = qAlpha(~color);
4628 ushort c = qConvertRgb32To16(color);
4629 ushort *target = ((ushort *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
4630 int len = spans->len;
4631 bool pre = (((quintptr)target) & 0x3) != 0;
4632 bool post = false;
4633 if (pre) {
4634 // skip to word boundary
4635 *target = c + BYTE_MUL_RGB16(*target, ialpha);
4636 ++target;
4637 --len;
4638 }
4639 if (len & 0x1) {
4640 post = true;
4641 --len;
4642 }
4643 uint *target32 = (uint*)target;
4644 uint c32 = c | (c<<16);
4645 len >>= 1;
4646 uint salpha = (ialpha+1) >> 3; // calculate here rather than in loop
4647 while (len--) {
4648 // blend full words
4649 *target32 = c32 + BYTE_MUL_RGB16_32(*target32, salpha);
4650 ++target32;
4651 target += 2;
4652 }
4653 if (post) {
4654 // one last pixel beyond a full word
4655 *target = c + BYTE_MUL_RGB16(*target, ialpha);
4656 }
4657 }
4658 return;
4659 }
4660
4661 blend_color_generic(count, spans, userData);
4662 }
4663
4664 template <typename T>
handleSpans(int count,const QSpan * spans,const QSpanData * data,T & handler)4665 void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handler)
4666 {
4667 uint const_alpha = 256;
4668 if (data->type == QSpanData::Texture)
4669 const_alpha = data->texture.const_alpha;
4670
4671 int coverage = 0;
4672 while (count) {
4673 if (!spans->len) {
4674 ++spans;
4675 --count;
4676 continue;
4677 }
4678 int x = spans->x;
4679 const int y = spans->y;
4680 int right = x + spans->len;
4681
4682 // compute length of adjacent spans
4683 for (int i = 1; i < count && spans[i].y == y && spans[i].x == right; ++i)
4684 right += spans[i].len;
4685 int length = right - x;
4686
4687 while (length) {
4688 int l = qMin(BufferSize, length);
4689 length -= l;
4690
4691 int process_length = l;
4692 int process_x = x;
4693
4694 const typename T::BlendType *src = handler.fetch(process_x, y, process_length);
4695 int offset = 0;
4696 while (l > 0) {
4697 if (x == spans->x) // new span?
4698 coverage = (spans->coverage * const_alpha) >> 8;
4699
4700 int right = spans->x + spans->len;
4701 int len = qMin(l, right - x);
4702
4703 handler.process(x, y, len, coverage, src, offset);
4704
4705 l -= len;
4706 x += len;
4707 offset += len;
4708
4709 if (x == right) { // done with current span?
4710 ++spans;
4711 --count;
4712 }
4713 }
4714 handler.store(process_x, y, process_length);
4715 }
4716 }
4717 }
4718
4719 template<typename T>
4720 struct QBlendBase
4721 {
4722 typedef T BlendType;
QBlendBaseQBlendBase4723 QBlendBase(QSpanData *d, const Operator &o)
4724 : data(d)
4725 , op(o)
4726 , dest(nullptr)
4727 {
4728 }
4729
4730 QSpanData *data;
4731 Operator op;
4732
4733 BlendType *dest;
4734
4735 alignas(8) BlendType buffer[BufferSize];
4736 alignas(8) BlendType src_buffer[BufferSize];
4737 };
4738
4739 class BlendSrcGeneric : public QBlendBase<uint>
4740 {
4741 public:
BlendSrcGeneric(QSpanData * d,const Operator & o)4742 BlendSrcGeneric(QSpanData *d, const Operator &o)
4743 : QBlendBase<uint>(d, o)
4744 {
4745 }
4746
fetch(int x,int y,int len)4747 const uint *fetch(int x, int y, int len)
4748 {
4749 dest = op.destFetch(buffer, data->rasterBuffer, x, y, len);
4750 return op.srcFetch(src_buffer, &op, data, y, x, len);
4751 }
4752
process(int,int,int len,int coverage,const uint * src,int offset)4753 void process(int, int, int len, int coverage, const uint *src, int offset)
4754 {
4755 op.func(dest + offset, src + offset, len, coverage);
4756 }
4757
store(int x,int y,int len)4758 void store(int x, int y, int len)
4759 {
4760 if (op.destStore)
4761 op.destStore(data->rasterBuffer, x, y, dest, len);
4762 }
4763 };
4764
4765 #if QT_CONFIG(raster_64bit)
4766 class BlendSrcGenericRGB64 : public QBlendBase<QRgba64>
4767 {
4768 public:
BlendSrcGenericRGB64(QSpanData * d,const Operator & o)4769 BlendSrcGenericRGB64(QSpanData *d, const Operator &o)
4770 : QBlendBase<QRgba64>(d, o)
4771 {
4772 }
4773
isSupported() const4774 bool isSupported() const
4775 {
4776 return op.func64 && op.destFetch64;
4777 }
4778
fetch(int x,int y,int len)4779 const QRgba64 *fetch(int x, int y, int len)
4780 {
4781 dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len);
4782 return op.srcFetch64(src_buffer, &op, data, y, x, len);
4783 }
4784
process(int,int,int len,int coverage,const QRgba64 * src,int offset)4785 void process(int, int, int len, int coverage, const QRgba64 *src, int offset)
4786 {
4787 op.func64(dest + offset, src + offset, len, coverage);
4788 }
4789
store(int x,int y,int len)4790 void store(int x, int y, int len)
4791 {
4792 if (op.destStore64)
4793 op.destStore64(data->rasterBuffer, x, y, dest, len);
4794 }
4795 };
4796 #endif
4797
blend_src_generic(int count,const QSpan * spans,void * userData)4798 static void blend_src_generic(int count, const QSpan *spans, void *userData)
4799 {
4800 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4801 BlendSrcGeneric blend(data, getOperator(data, spans, count));
4802 handleSpans(count, spans, data, blend);
4803 }
4804
4805 #if QT_CONFIG(raster_64bit)
blend_src_generic_rgb64(int count,const QSpan * spans,void * userData)4806 static void blend_src_generic_rgb64(int count, const QSpan *spans, void *userData)
4807 {
4808 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4809 Operator op = getOperator(data, spans, count);
4810 BlendSrcGenericRGB64 blend64(data, op);
4811 if (blend64.isSupported())
4812 handleSpans(count, spans, data, blend64);
4813 else {
4814 qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4815 BlendSrcGeneric blend32(data, op);
4816 handleSpans(count, spans, data, blend32);
4817 }
4818 }
4819 #endif
4820
blend_untransformed_generic(int count,const QSpan * spans,void * userData)4821 static void blend_untransformed_generic(int count, const QSpan *spans, void *userData)
4822 {
4823 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4824
4825 uint buffer[BufferSize];
4826 uint src_buffer[BufferSize];
4827 Operator op = getOperator(data, spans, count);
4828
4829 const int image_width = data->texture.width;
4830 const int image_height = data->texture.height;
4831 int xoff = -qRound(-data->dx);
4832 int yoff = -qRound(-data->dy);
4833
4834 for (; count--; spans++) {
4835 if (!spans->len)
4836 continue;
4837 int x = spans->x;
4838 int length = spans->len;
4839 int sx = xoff + x;
4840 int sy = yoff + spans->y;
4841 if (sy >= 0 && sy < image_height && sx < image_width) {
4842 if (sx < 0) {
4843 x -= sx;
4844 length += sx;
4845 sx = 0;
4846 }
4847 if (sx + length > image_width)
4848 length = image_width - sx;
4849 if (length > 0) {
4850 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4851 while (length) {
4852 int l = qMin(BufferSize, length);
4853 const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
4854 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
4855 op.func(dest, src, l, coverage);
4856 if (op.destStore)
4857 op.destStore(data->rasterBuffer, x, spans->y, dest, l);
4858 x += l;
4859 sx += l;
4860 length -= l;
4861 }
4862 }
4863 }
4864 }
4865 }
4866
4867 #if QT_CONFIG(raster_64bit)
blend_untransformed_generic_rgb64(int count,const QSpan * spans,void * userData)4868 static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, void *userData)
4869 {
4870 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4871
4872 Operator op = getOperator(data, spans, count);
4873 if (!op.func64) {
4874 qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4875 return blend_untransformed_generic(count, spans, userData);
4876 }
4877 alignas(8) QRgba64 buffer[BufferSize];
4878 alignas(8) QRgba64 src_buffer[BufferSize];
4879
4880 const int image_width = data->texture.width;
4881 const int image_height = data->texture.height;
4882 int xoff = -qRound(-data->dx);
4883 int yoff = -qRound(-data->dy);
4884
4885 for (; count--; spans++) {
4886 if (!spans->len)
4887 continue;
4888 int x = spans->x;
4889 int length = spans->len;
4890 int sx = xoff + x;
4891 int sy = yoff + spans->y;
4892 if (sy >= 0 && sy < image_height && sx < image_width) {
4893 if (sx < 0) {
4894 x -= sx;
4895 length += sx;
4896 sx = 0;
4897 }
4898 if (sx + length > image_width)
4899 length = image_width - sx;
4900 if (length > 0) {
4901 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4902 while (length) {
4903 int l = qMin(BufferSize, length);
4904 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4905 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
4906 op.func64(dest, src, l, coverage);
4907 if (op.destStore64)
4908 op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
4909 x += l;
4910 sx += l;
4911 length -= l;
4912 }
4913 }
4914 }
4915 }
4916 }
4917 #endif
4918
blend_untransformed_argb(int count,const QSpan * spans,void * userData)4919 static void blend_untransformed_argb(int count, const QSpan *spans, void *userData)
4920 {
4921 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4922 if (data->texture.format != QImage::Format_ARGB32_Premultiplied
4923 && data->texture.format != QImage::Format_RGB32) {
4924 blend_untransformed_generic(count, spans, userData);
4925 return;
4926 }
4927
4928 Operator op = getOperator(data, spans, count);
4929
4930 const int image_width = data->texture.width;
4931 const int image_height = data->texture.height;
4932 int xoff = -qRound(-data->dx);
4933 int yoff = -qRound(-data->dy);
4934
4935 for (; count--; spans++) {
4936 if (!spans->len)
4937 continue;
4938 int x = spans->x;
4939 int length = spans->len;
4940 int sx = xoff + x;
4941 int sy = yoff + spans->y;
4942 if (sy >= 0 && sy < image_height && sx < image_width) {
4943 if (sx < 0) {
4944 x -= sx;
4945 length += sx;
4946 sx = 0;
4947 }
4948 if (sx + length > image_width)
4949 length = image_width - sx;
4950 if (length > 0) {
4951 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4952 const uint *src = (const uint *)data->texture.scanLine(sy) + sx;
4953 uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x;
4954 op.func(dest, src, length, coverage);
4955 }
4956 }
4957 }
4958 }
4959
interpolate_pixel_rgb16_255(quint16 x,quint8 a,quint16 y,quint8 b)4960 static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a,
4961 quint16 y, quint8 b)
4962 {
4963 quint16 t = ((((x & 0x07e0) * a) + ((y & 0x07e0) * b)) >> 5) & 0x07e0;
4964 t |= ((((x & 0xf81f) * a) + ((y & 0xf81f) * b)) >> 5) & 0xf81f;
4965
4966 return t;
4967 }
4968
interpolate_pixel_rgb16x2_255(quint32 x,quint8 a,quint32 y,quint8 b)4969 static inline quint32 interpolate_pixel_rgb16x2_255(quint32 x, quint8 a,
4970 quint32 y, quint8 b)
4971 {
4972 uint t;
4973 t = ((((x & 0xf81f07e0) >> 5) * a) + (((y & 0xf81f07e0) >> 5) * b)) & 0xf81f07e0;
4974 t |= ((((x & 0x07e0f81f) * a) + ((y & 0x07e0f81f) * b)) >> 5) & 0x07e0f81f;
4975 return t;
4976 }
4977
blend_sourceOver_rgb16_rgb16(quint16 * Q_DECL_RESTRICT dest,const quint16 * Q_DECL_RESTRICT src,int length,const quint8 alpha,const quint8 ialpha)4978 static inline void blend_sourceOver_rgb16_rgb16(quint16 *Q_DECL_RESTRICT dest,
4979 const quint16 *Q_DECL_RESTRICT src,
4980 int length,
4981 const quint8 alpha,
4982 const quint8 ialpha)
4983 {
4984 const int dstAlign = ((quintptr)dest) & 0x3;
4985 if (dstAlign) {
4986 *dest = interpolate_pixel_rgb16_255(*src, alpha, *dest, ialpha);
4987 ++dest;
4988 ++src;
4989 --length;
4990 }
4991 const int srcAlign = ((quintptr)src) & 0x3;
4992 int length32 = length >> 1;
4993 if (length32 && srcAlign == 0) {
4994 while (length32--) {
4995 const quint32 *src32 = reinterpret_cast<const quint32*>(src);
4996 quint32 *dest32 = reinterpret_cast<quint32*>(dest);
4997 *dest32 = interpolate_pixel_rgb16x2_255(*src32, alpha,
4998 *dest32, ialpha);
4999 dest += 2;
5000 src += 2;
5001 }
5002 length &= 0x1;
5003 }
5004 while (length--) {
5005 *dest = interpolate_pixel_rgb16_255(*src, alpha, *dest, ialpha);
5006 ++dest;
5007 ++src;
5008 }
5009 }
5010
blend_untransformed_rgb565(int count,const QSpan * spans,void * userData)5011 static void blend_untransformed_rgb565(int count, const QSpan *spans, void *userData)
5012 {
5013 QSpanData *data = reinterpret_cast<QSpanData*>(userData);
5014 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
5015
5016 if (data->texture.format != QImage::Format_RGB16
5017 || (mode != QPainter::CompositionMode_SourceOver
5018 && mode != QPainter::CompositionMode_Source))
5019 {
5020 blend_untransformed_generic(count, spans, userData);
5021 return;
5022 }
5023
5024 const int image_width = data->texture.width;
5025 const int image_height = data->texture.height;
5026 int xoff = -qRound(-data->dx);
5027 int yoff = -qRound(-data->dy);
5028
5029 const QSpan *end = spans + count;
5030 while (spans < end) {
5031 if (!spans->len) {
5032 ++spans;
5033 continue;
5034 }
5035 const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8;
5036 if (coverage == 0) {
5037 ++spans;
5038 continue;
5039 }
5040
5041 int x = spans->x;
5042 int length = spans->len;
5043 int sx = xoff + x;
5044 int sy = yoff + spans->y;
5045 if (sy >= 0 && sy < image_height && sx < image_width) {
5046 if (sx < 0) {
5047 x -= sx;
5048 length += sx;
5049 sx = 0;
5050 }
5051 if (sx + length > image_width)
5052 length = image_width - sx;
5053 if (length > 0) {
5054 quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + x;
5055 const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
5056 if (coverage == 255) {
5057 memcpy(dest, src, length * sizeof(quint16));
5058 } else {
5059 const quint8 alpha = (coverage + 1) >> 3;
5060 const quint8 ialpha = 0x20 - alpha;
5061 if (alpha > 0)
5062 blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha);
5063 }
5064 }
5065 }
5066 ++spans;
5067 }
5068 }
5069
blend_tiled_generic(int count,const QSpan * spans,void * userData)5070 static void blend_tiled_generic(int count, const QSpan *spans, void *userData)
5071 {
5072 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5073
5074 uint buffer[BufferSize];
5075 uint src_buffer[BufferSize];
5076 Operator op = getOperator(data, spans, count);
5077
5078 const int image_width = data->texture.width;
5079 const int image_height = data->texture.height;
5080 int xoff = -qRound(-data->dx) % image_width;
5081 int yoff = -qRound(-data->dy) % image_height;
5082
5083 if (xoff < 0)
5084 xoff += image_width;
5085 if (yoff < 0)
5086 yoff += image_height;
5087
5088 while (count--) {
5089 int x = spans->x;
5090 int length = spans->len;
5091 int sx = (xoff + spans->x) % image_width;
5092 int sy = (spans->y + yoff) % image_height;
5093 if (sx < 0)
5094 sx += image_width;
5095 if (sy < 0)
5096 sy += image_height;
5097
5098 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5099 while (length) {
5100 int l = qMin(image_width - sx, length);
5101 if (BufferSize < l)
5102 l = BufferSize;
5103 const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
5104 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
5105 op.func(dest, src, l, coverage);
5106 if (op.destStore)
5107 op.destStore(data->rasterBuffer, x, spans->y, dest, l);
5108 x += l;
5109 sx += l;
5110 length -= l;
5111 if (sx >= image_width)
5112 sx = 0;
5113 }
5114 ++spans;
5115 }
5116 }
5117
5118 #if QT_CONFIG(raster_64bit)
blend_tiled_generic_rgb64(int count,const QSpan * spans,void * userData)5119 static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userData)
5120 {
5121 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5122
5123 Operator op = getOperator(data, spans, count);
5124 if (!op.func64) {
5125 qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
5126 return blend_tiled_generic(count, spans, userData);
5127 }
5128 alignas(8) QRgba64 buffer[BufferSize];
5129 alignas(8) QRgba64 src_buffer[BufferSize];
5130
5131 const int image_width = data->texture.width;
5132 const int image_height = data->texture.height;
5133 int xoff = -qRound(-data->dx) % image_width;
5134 int yoff = -qRound(-data->dy) % image_height;
5135
5136 if (xoff < 0)
5137 xoff += image_width;
5138 if (yoff < 0)
5139 yoff += image_height;
5140
5141 bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32;
5142 bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64;
5143 if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 || isBpp64)) {
5144 // If destination isn't blended into the result, we can do the tiling directly on destination pixels.
5145 while (count--) {
5146 int x = spans->x;
5147 int y = spans->y;
5148 int length = spans->len;
5149 int sx = (xoff + spans->x) % image_width;
5150 int sy = (spans->y + yoff) % image_height;
5151 if (sx < 0)
5152 sx += image_width;
5153 if (sy < 0)
5154 sy += image_height;
5155
5156 int sl = qMin(image_width, length);
5157 if (sx > 0 && sl > 0) {
5158 int l = qMin(image_width - sx, sl);
5159 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
5160 op.destStore64(data->rasterBuffer, x, y, src, l);
5161 x += l;
5162 sx += l;
5163 sl -= l;
5164 if (sx >= image_width)
5165 sx = 0;
5166 }
5167 if (sl > 0) {
5168 Q_ASSERT(sx == 0);
5169 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, sl);
5170 op.destStore64(data->rasterBuffer, x, y, src, sl);
5171 x += sl;
5172 sx += sl;
5173 sl -= sl;
5174 if (sx >= image_width)
5175 sx = 0;
5176 }
5177 if (isBpp32) {
5178 uint *dest = reinterpret_cast<uint *>(data->rasterBuffer->scanLine(y)) + x - image_width;
5179 for (int i = image_width; i < length; ++i)
5180 dest[i] = dest[i - image_width];
5181 } else {
5182 quint64 *dest = reinterpret_cast<quint64 *>(data->rasterBuffer->scanLine(y)) + x - image_width;
5183 for (int i = image_width; i < length; ++i)
5184 dest[i] = dest[i - image_width];
5185 }
5186 ++spans;
5187 }
5188 return;
5189 }
5190
5191 while (count--) {
5192 int x = spans->x;
5193 int length = spans->len;
5194 int sx = (xoff + spans->x) % image_width;
5195 int sy = (spans->y + yoff) % image_height;
5196 if (sx < 0)
5197 sx += image_width;
5198 if (sy < 0)
5199 sy += image_height;
5200
5201 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5202 while (length) {
5203 int l = qMin(image_width - sx, length);
5204 if (BufferSize < l)
5205 l = BufferSize;
5206 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
5207 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
5208 op.func64(dest, src, l, coverage);
5209 if (op.destStore64)
5210 op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
5211 x += l;
5212 sx += l;
5213 length -= l;
5214 if (sx >= image_width)
5215 sx = 0;
5216 }
5217 ++spans;
5218 }
5219 }
5220 #endif
5221
blend_tiled_argb(int count,const QSpan * spans,void * userData)5222 static void blend_tiled_argb(int count, const QSpan *spans, void *userData)
5223 {
5224 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5225 if (data->texture.format != QImage::Format_ARGB32_Premultiplied
5226 && data->texture.format != QImage::Format_RGB32) {
5227 blend_tiled_generic(count, spans, userData);
5228 return;
5229 }
5230
5231 Operator op = getOperator(data, spans, count);
5232
5233 int image_width = data->texture.width;
5234 int image_height = data->texture.height;
5235 int xoff = -qRound(-data->dx) % image_width;
5236 int yoff = -qRound(-data->dy) % image_height;
5237
5238 if (xoff < 0)
5239 xoff += image_width;
5240 if (yoff < 0)
5241 yoff += image_height;
5242
5243 while (count--) {
5244 int x = spans->x;
5245 int length = spans->len;
5246 int sx = (xoff + spans->x) % image_width;
5247 int sy = (spans->y + yoff) % image_height;
5248 if (sx < 0)
5249 sx += image_width;
5250 if (sy < 0)
5251 sy += image_height;
5252
5253 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5254 while (length) {
5255 int l = qMin(image_width - sx, length);
5256 if (BufferSize < l)
5257 l = BufferSize;
5258 const uint *src = (const uint *)data->texture.scanLine(sy) + sx;
5259 uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x;
5260 op.func(dest, src, l, coverage);
5261 x += l;
5262 sx += l;
5263 length -= l;
5264 if (sx >= image_width)
5265 sx = 0;
5266 }
5267 ++spans;
5268 }
5269 }
5270
blend_tiled_rgb565(int count,const QSpan * spans,void * userData)5271 static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
5272 {
5273 QSpanData *data = reinterpret_cast<QSpanData*>(userData);
5274 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
5275
5276 if (data->texture.format != QImage::Format_RGB16
5277 || (mode != QPainter::CompositionMode_SourceOver
5278 && mode != QPainter::CompositionMode_Source))
5279 {
5280 blend_tiled_generic(count, spans, userData);
5281 return;
5282 }
5283
5284 const int image_width = data->texture.width;
5285 const int image_height = data->texture.height;
5286 int xoff = -qRound(-data->dx) % image_width;
5287 int yoff = -qRound(-data->dy) % image_height;
5288
5289 if (xoff < 0)
5290 xoff += image_width;
5291 if (yoff < 0)
5292 yoff += image_height;
5293
5294 while (count--) {
5295 const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8;
5296 if (coverage == 0) {
5297 ++spans;
5298 continue;
5299 }
5300
5301 int x = spans->x;
5302 int length = spans->len;
5303 int sx = (xoff + spans->x) % image_width;
5304 int sy = (spans->y + yoff) % image_height;
5305 if (sx < 0)
5306 sx += image_width;
5307 if (sy < 0)
5308 sy += image_height;
5309
5310 if (coverage == 255) {
5311 // Copy the first texture block
5312 length = qMin(image_width,length);
5313 int tx = x;
5314 while (length) {
5315 int l = qMin(image_width - sx, length);
5316 if (BufferSize < l)
5317 l = BufferSize;
5318 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + tx;
5319 const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
5320 memcpy(dest, src, l * sizeof(quint16));
5321 length -= l;
5322 tx += l;
5323 sx += l;
5324 if (sx >= image_width)
5325 sx = 0;
5326 }
5327
5328 // Now use the rasterBuffer as the source of the texture,
5329 // We can now progressively copy larger blocks
5330 // - Less cpu time in code figuring out what to copy
5331 // We are dealing with one block of data
5332 // - More likely to fit in the cache
5333 // - can use memcpy
5334 int copy_image_width = qMin(image_width, int(spans->len));
5335 length = spans->len - copy_image_width;
5336 quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x;
5337 quint16 *dest = src + copy_image_width;
5338 while (copy_image_width < length) {
5339 memcpy(dest, src, copy_image_width * sizeof(quint16));
5340 dest += copy_image_width;
5341 length -= copy_image_width;
5342 copy_image_width *= 2;
5343 }
5344 if (length > 0)
5345 memcpy(dest, src, length * sizeof(quint16));
5346 } else {
5347 const quint8 alpha = (coverage + 1) >> 3;
5348 const quint8 ialpha = 0x20 - alpha;
5349 if (alpha > 0) {
5350 while (length) {
5351 int l = qMin(image_width - sx, length);
5352 if (BufferSize < l)
5353 l = BufferSize;
5354 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x;
5355 const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
5356 blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha);
5357 x += l;
5358 sx += l;
5359 length -= l;
5360 if (sx >= image_width)
5361 sx = 0;
5362 }
5363 }
5364 }
5365 ++spans;
5366 }
5367 }
5368
5369 /* Image formats here are target formats */
5370 static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = {
5371 blend_untransformed_argb, // Untransformed
5372 blend_tiled_argb, // Tiled
5373 blend_src_generic, // Transformed
5374 blend_src_generic, // TransformedTiled
5375 blend_src_generic, // TransformedBilinear
5376 blend_src_generic // TransformedBilinearTiled
5377 };
5378
5379 static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = {
5380 blend_untransformed_rgb565, // Untransformed
5381 blend_tiled_rgb565, // Tiled
5382 blend_src_generic, // Transformed
5383 blend_src_generic, // TransformedTiled
5384 blend_src_generic, // TransformedBilinear
5385 blend_src_generic // TransformedBilinearTiled
5386 };
5387
5388 static const ProcessSpans processTextureSpansGeneric[NBlendTypes] = {
5389 blend_untransformed_generic, // Untransformed
5390 blend_tiled_generic, // Tiled
5391 blend_src_generic, // Transformed
5392 blend_src_generic, // TransformedTiled
5393 blend_src_generic, // TransformedBilinear
5394 blend_src_generic // TransformedBilinearTiled
5395 };
5396
5397 #if QT_CONFIG(raster_64bit)
5398 static const ProcessSpans processTextureSpansGeneric64[NBlendTypes] = {
5399 blend_untransformed_generic_rgb64, // Untransformed
5400 blend_tiled_generic_rgb64, // Tiled
5401 blend_src_generic_rgb64, // Transformed
5402 blend_src_generic_rgb64, // TransformedTiled
5403 blend_src_generic_rgb64, // TransformedBilinear
5404 blend_src_generic_rgb64 // TransformedBilinearTiled
5405 };
5406 #endif
5407
qBlendTexture(int count,const QSpan * spans,void * userData)5408 void qBlendTexture(int count, const QSpan *spans, void *userData)
5409 {
5410 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5411 TextureBlendType blendType = getBlendType(data);
5412 ProcessSpans proc;
5413 switch (data->rasterBuffer->format) {
5414 case QImage::Format_ARGB32_Premultiplied:
5415 proc = processTextureSpansARGB32PM[blendType];
5416 break;
5417 case QImage::Format_RGB16:
5418 proc = processTextureSpansRGB16[blendType];
5419 break;
5420 #if QT_CONFIG(raster_64bit)
5421 #if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
5422 case QImage::Format_ARGB32:
5423 case QImage::Format_RGBA8888:
5424 #endif
5425 case QImage::Format_BGR30:
5426 case QImage::Format_A2BGR30_Premultiplied:
5427 case QImage::Format_RGB30:
5428 case QImage::Format_A2RGB30_Premultiplied:
5429 case QImage::Format_RGBX64:
5430 case QImage::Format_RGBA64:
5431 case QImage::Format_RGBA64_Premultiplied:
5432 case QImage::Format_Grayscale16:
5433 proc = processTextureSpansGeneric64[blendType];
5434 break;
5435 #endif // QT_CONFIG(raster_64bit)
5436 case QImage::Format_Invalid:
5437 Q_UNREACHABLE();
5438 return;
5439 default:
5440 proc = processTextureSpansGeneric[blendType];
5441 break;
5442 }
5443 proc(count, spans, userData);
5444 }
5445
blend_vertical_gradient_argb(int count,const QSpan * spans,void * userData)5446 static void blend_vertical_gradient_argb(int count, const QSpan *spans, void *userData)
5447 {
5448 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5449
5450 LinearGradientValues linear;
5451 getLinearGradientValues(&linear, data);
5452
5453 CompositionFunctionSolid funcSolid =
5454 functionForModeSolid[data->rasterBuffer->compositionMode];
5455
5456 /*
5457 The logic for vertical gradient calculations is a mathematically
5458 reduced copy of that in fetchLinearGradient() - which is basically:
5459
5460 qreal ry = data->m22 * (y + 0.5) + data->dy;
5461 qreal t = linear.dy*ry + linear.off;
5462 t *= (GRADIENT_STOPTABLE_SIZE - 1);
5463 quint32 color =
5464 qt_gradient_pixel_fixed(&data->gradient,
5465 int(t * FIXPT_SIZE));
5466
5467 This has then been converted to fixed point to improve performance.
5468 */
5469 const int gss = GRADIENT_STOPTABLE_SIZE - 1;
5470 int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5471 int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5472
5473 while (count--) {
5474 int y = spans->y;
5475 int x = spans->x;
5476
5477 quint32 *dst = (quint32 *)(data->rasterBuffer->scanLine(y)) + x;
5478 quint32 color =
5479 qt_gradient_pixel_fixed(&data->gradient, yinc * y + off);
5480
5481 funcSolid(dst, spans->len, color, spans->coverage);
5482 ++spans;
5483 }
5484 }
5485
5486 template<ProcessSpans blend_color>
blend_vertical_gradient(int count,const QSpan * spans,void * userData)5487 static void blend_vertical_gradient(int count, const QSpan *spans, void *userData)
5488 {
5489 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5490
5491 LinearGradientValues linear;
5492 getLinearGradientValues(&linear, data);
5493
5494 // Based on the same logic as blend_vertical_gradient_argb.
5495
5496 const int gss = GRADIENT_STOPTABLE_SIZE - 1;
5497 int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5498 int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5499
5500 while (count--) {
5501 int y = spans->y;
5502
5503 #if QT_CONFIG(raster_64bit)
5504 data->solidColor = qt_gradient_pixel64_fixed(&data->gradient, yinc * y + off);
5505 #else
5506 data->solidColor = QRgba64::fromArgb32(qt_gradient_pixel_fixed(&data->gradient, yinc * y + off));
5507 #endif
5508 blend_color(1, spans, userData);
5509 ++spans;
5510 }
5511 }
5512
qBlendGradient(int count,const QSpan * spans,void * userData)5513 void qBlendGradient(int count, const QSpan *spans, void *userData)
5514 {
5515 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5516 bool isVerticalGradient =
5517 data->txop <= QTransform::TxScale &&
5518 data->type == QSpanData::LinearGradient &&
5519 data->gradient.linear.end.x == data->gradient.linear.origin.x;
5520 switch (data->rasterBuffer->format) {
5521 case QImage::Format_RGB16:
5522 if (isVerticalGradient)
5523 return blend_vertical_gradient<blend_color_rgb16>(count, spans, userData);
5524 return blend_src_generic(count, spans, userData);
5525 case QImage::Format_RGB32:
5526 case QImage::Format_ARGB32_Premultiplied:
5527 if (isVerticalGradient)
5528 return blend_vertical_gradient_argb(count, spans, userData);
5529 return blend_src_generic(count, spans, userData);
5530 #if QT_CONFIG(raster_64bit)
5531 #if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
5532 case QImage::Format_ARGB32:
5533 case QImage::Format_RGBA8888:
5534 #endif
5535 case QImage::Format_BGR30:
5536 case QImage::Format_A2BGR30_Premultiplied:
5537 case QImage::Format_RGB30:
5538 case QImage::Format_A2RGB30_Premultiplied:
5539 case QImage::Format_RGBX64:
5540 case QImage::Format_RGBA64:
5541 case QImage::Format_RGBA64_Premultiplied:
5542 if (isVerticalGradient)
5543 return blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData);
5544 return blend_src_generic_rgb64(count, spans, userData);
5545 #endif // QT_CONFIG(raster_64bit)
5546 case QImage::Format_Invalid:
5547 break;
5548 default:
5549 if (isVerticalGradient)
5550 return blend_vertical_gradient<blend_color_generic>(count, spans, userData);
5551 return blend_src_generic(count, spans, userData);
5552 }
5553 Q_UNREACHABLE();
5554 }
5555
5556 template <class DST> static
qt_bitmapblit_template(QRasterBuffer * rasterBuffer,int x,int y,DST color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5557 inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer,
5558 int x, int y, DST color,
5559 const uchar *map,
5560 int mapWidth, int mapHeight, int mapStride)
5561 {
5562 DST *dest = reinterpret_cast<DST *>(rasterBuffer->scanLine(y)) + x;
5563 const int destStride = rasterBuffer->stride<DST>();
5564
5565 if (mapWidth > 8) {
5566 while (mapHeight--) {
5567 int x0 = 0;
5568 int n = 0;
5569 for (int x = 0; x < mapWidth; x += 8) {
5570 uchar s = map[x >> 3];
5571 for (int i = 0; i < 8; ++i) {
5572 if (s & 0x80) {
5573 ++n;
5574 } else {
5575 if (n) {
5576 qt_memfill(dest + x0, color, n);
5577 x0 += n + 1;
5578 n = 0;
5579 } else {
5580 ++x0;
5581 }
5582 if (!s) {
5583 x0 += 8 - 1 - i;
5584 break;
5585 }
5586 }
5587 s <<= 1;
5588 }
5589 }
5590 if (n)
5591 qt_memfill(dest + x0, color, n);
5592 dest += destStride;
5593 map += mapStride;
5594 }
5595 } else {
5596 while (mapHeight--) {
5597 int x0 = 0;
5598 int n = 0;
5599 for (uchar s = *map; s; s <<= 1) {
5600 if (s & 0x80) {
5601 ++n;
5602 } else if (n) {
5603 qt_memfill(dest + x0, color, n);
5604 x0 += n + 1;
5605 n = 0;
5606 } else {
5607 ++x0;
5608 }
5609 }
5610 if (n)
5611 qt_memfill(dest + x0, color, n);
5612 dest += destStride;
5613 map += mapStride;
5614 }
5615 }
5616 }
5617
qt_bitmapblit_argb32(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5618 inline static void qt_bitmapblit_argb32(QRasterBuffer *rasterBuffer,
5619 int x, int y, const QRgba64 &color,
5620 const uchar *map,
5621 int mapWidth, int mapHeight, int mapStride)
5622 {
5623 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color.toArgb32(),
5624 map, mapWidth, mapHeight, mapStride);
5625 }
5626
qt_bitmapblit_rgba8888(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5627 inline static void qt_bitmapblit_rgba8888(QRasterBuffer *rasterBuffer,
5628 int x, int y, const QRgba64 &color,
5629 const uchar *map,
5630 int mapWidth, int mapHeight, int mapStride)
5631 {
5632 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, ARGB2RGBA(color.toArgb32()),
5633 map, mapWidth, mapHeight, mapStride);
5634 }
5635
5636 template<QtPixelOrder PixelOrder>
qt_bitmapblit_rgb30(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5637 inline static void qt_bitmapblit_rgb30(QRasterBuffer *rasterBuffer,
5638 int x, int y, const QRgba64 &color,
5639 const uchar *map,
5640 int mapWidth, int mapHeight, int mapStride)
5641 {
5642 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, qConvertRgb64ToRgb30<PixelOrder>(color),
5643 map, mapWidth, mapHeight, mapStride);
5644 }
5645
qt_bitmapblit_quint16(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride)5646 inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer,
5647 int x, int y, const QRgba64 &color,
5648 const uchar *map,
5649 int mapWidth, int mapHeight, int mapStride)
5650 {
5651 qt_bitmapblit_template<quint16>(rasterBuffer, x, y, color.toRgb16(),
5652 map, mapWidth, mapHeight, mapStride);
5653 }
5654
grayBlendPixel(quint32 * dst,int coverage,QRgba64 srcLinear,const QColorTrcLut * colorProfile)5655 static inline void grayBlendPixel(quint32 *dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5656 {
5657 // Do a gammacorrected gray alphablend...
5658 const QRgba64 dstLinear = colorProfile ? colorProfile->toLinear64(*dst) : QRgba64::fromArgb32(*dst);
5659
5660 QRgba64 blend = interpolate255(srcLinear, coverage, dstLinear, 255 - coverage);
5661
5662 *dst = colorProfile ? colorProfile->fromLinear64(blend) : toArgb32(blend);
5663 }
5664
alphamapblend_argb32(quint32 * dst,int coverage,QRgba64 srcLinear,quint32 src,const QColorTrcLut * colorProfile)5665 static inline void alphamapblend_argb32(quint32 *dst, int coverage, QRgba64 srcLinear, quint32 src, const QColorTrcLut *colorProfile)
5666 {
5667 if (coverage == 0) {
5668 // nothing
5669 } else if (coverage == 255 || !colorProfile) {
5670 blend_pixel(*dst, src, coverage);
5671 } else if (*dst < 0xff000000) {
5672 // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
5673 blend_pixel(*dst, src, coverage);
5674 } else if (src >= 0xff000000) {
5675 grayBlendPixel(dst, coverage, srcLinear, colorProfile);
5676 } else {
5677 // First do naive blend with text-color
5678 QRgb s = *dst;
5679 blend_pixel(s, src);
5680 // Then gamma-corrected blend with glyph shape
5681 QRgba64 s64 = colorProfile ? colorProfile->toLinear64(s) : QRgba64::fromArgb32(s);
5682 grayBlendPixel(dst, coverage, s64, colorProfile);
5683 }
5684 }
5685
5686 #if QT_CONFIG(raster_64bit)
5687
grayBlendPixel(QRgba64 & dst,int coverage,QRgba64 srcLinear,const QColorTrcLut * colorProfile)5688 static inline void grayBlendPixel(QRgba64 &dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5689 {
5690 // Do a gammacorrected gray alphablend...
5691 QRgba64 dstColor = dst;
5692 if (colorProfile) {
5693 if (dstColor.isOpaque())
5694 dstColor = colorProfile->toLinear(dstColor);
5695 else if (!dstColor.isTransparent())
5696 dstColor = colorProfile->toLinear(dstColor.unpremultiplied()).premultiplied();
5697 }
5698
5699 blend_pixel(dstColor, srcLinear, coverage);
5700
5701 if (colorProfile) {
5702 if (dstColor.isOpaque())
5703 dstColor = colorProfile->fromLinear(dstColor);
5704 else if (!dstColor.isTransparent())
5705 dstColor = colorProfile->fromLinear(dstColor.unpremultiplied()).premultiplied();
5706 }
5707 dst = dstColor;
5708 }
5709
alphamapblend_generic(int coverage,QRgba64 * dest,int x,const QRgba64 & srcLinear,const QRgba64 & src,const QColorTrcLut * colorProfile)5710 static inline void alphamapblend_generic(int coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
5711 {
5712 if (coverage == 0) {
5713 // nothing
5714 } else if (coverage == 255) {
5715 blend_pixel(dest[x], src);
5716 } else if (src.isOpaque()) {
5717 grayBlendPixel(dest[x], coverage, srcLinear, colorProfile);
5718 } else {
5719 // First do naive blend with text-color
5720 QRgba64 s = dest[x];
5721 blend_pixel(s, src);
5722 // Then gamma-corrected blend with glyph shape
5723 if (colorProfile)
5724 s = colorProfile->toLinear(s);
5725 grayBlendPixel(dest[x], coverage, s, colorProfile);
5726 }
5727 }
5728
qt_alphamapblit_generic(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride,const QClipData * clip,bool useGammaCorrection)5729 static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5730 int x, int y, const QRgba64 &color,
5731 const uchar *map,
5732 int mapWidth, int mapHeight, int mapStride,
5733 const QClipData *clip, bool useGammaCorrection)
5734 {
5735 if (color.isTransparent())
5736 return;
5737
5738 const QColorTrcLut *colorProfile = nullptr;
5739
5740 if (useGammaCorrection)
5741 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5742
5743 QRgba64 srcColor = color;
5744 if (colorProfile && color.isOpaque())
5745 srcColor = colorProfile->toLinear(srcColor);
5746
5747 alignas(8) QRgba64 buffer[BufferSize];
5748 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5749 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5750
5751 if (!clip) {
5752 for (int ly = 0; ly < mapHeight; ++ly) {
5753 int i = x;
5754 int length = mapWidth;
5755 while (length > 0) {
5756 int l = qMin(BufferSize, length);
5757 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5758 for (int j=0; j < l; ++j) {
5759 const int coverage = map[j + (i - x)];
5760 alphamapblend_generic(coverage, dest, j, srcColor, color, colorProfile);
5761 }
5762 if (destStore64)
5763 destStore64(rasterBuffer, i, y + ly, dest, l);
5764 length -= l;
5765 i += l;
5766 }
5767 map += mapStride;
5768 }
5769 } else {
5770 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5771
5772 int top = qMax(y, 0);
5773 map += (top - y) * mapStride;
5774
5775 const_cast<QClipData *>(clip)->initialize();
5776 for (int yp = top; yp<bottom; ++yp) {
5777 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5778
5779 for (int i=0; i<line.count; ++i) {
5780 const QSpan &clip = line.spans[i];
5781
5782 int start = qMax<int>(x, clip.x);
5783 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5784 if (end <= start)
5785 continue;
5786 Q_ASSERT(end - start <= BufferSize);
5787 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
5788
5789 for (int xp=start; xp<end; ++xp) {
5790 const int coverage = map[xp - x];
5791 alphamapblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile);
5792 }
5793 if (destStore64)
5794 destStore64(rasterBuffer, start, clip.y, dest, end - start);
5795 } // for (i -> line.count)
5796 map += mapStride;
5797 } // for (yp -> bottom)
5798 }
5799 }
5800 #else
qt_alphamapblit_generic(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride,const QClipData * clip,bool useGammaCorrection)5801 static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5802 int x, int y, const QRgba64 &color,
5803 const uchar *map,
5804 int mapWidth, int mapHeight, int mapStride,
5805 const QClipData *clip, bool useGammaCorrection)
5806 {
5807 if (color.isTransparent())
5808 return;
5809
5810 const quint32 c = color.toArgb32();
5811
5812 const QColorTrcLut *colorProfile = nullptr;
5813
5814 if (useGammaCorrection)
5815 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5816
5817 QRgba64 srcColor = color;
5818 if (colorProfile && color.isOpaque())
5819 srcColor = colorProfile->toLinear(srcColor);
5820
5821 quint32 buffer[BufferSize];
5822 const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
5823 const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
5824
5825 if (!clip) {
5826 for (int ly = 0; ly < mapHeight; ++ly) {
5827 int i = x;
5828 int length = mapWidth;
5829 while (length > 0) {
5830 int l = qMin(BufferSize, length);
5831 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
5832 for (int j=0; j < l; ++j) {
5833 const int coverage = map[j + (i - x)];
5834 alphamapblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
5835 }
5836 if (destStore)
5837 destStore(rasterBuffer, i, y + ly, dest, l);
5838 length -= l;
5839 i += l;
5840 }
5841 map += mapStride;
5842 }
5843 } else {
5844 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5845
5846 int top = qMax(y, 0);
5847 map += (top - y) * mapStride;
5848
5849 const_cast<QClipData *>(clip)->initialize();
5850 for (int yp = top; yp<bottom; ++yp) {
5851 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5852
5853 for (int i=0; i<line.count; ++i) {
5854 const QSpan &clip = line.spans[i];
5855
5856 int start = qMax<int>(x, clip.x);
5857 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5858 if (end <= start)
5859 continue;
5860 Q_ASSERT(end - start <= BufferSize);
5861 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
5862
5863 for (int xp=start; xp<end; ++xp) {
5864 const int coverage = map[xp - x];
5865 alphamapblend_argb32(dest + xp - x, coverage, srcColor, color, colorProfile);
5866 }
5867 if (destStore)
5868 destStore(rasterBuffer, start, clip.y, dest, end - start);
5869 } // for (i -> line.count)
5870 map += mapStride;
5871 } // for (yp -> bottom)
5872 }
5873 }
5874 #endif
5875
alphamapblend_quint16(int coverage,quint16 * dest,int x,const quint16 srcColor)5876 static inline void alphamapblend_quint16(int coverage, quint16 *dest, int x, const quint16 srcColor)
5877 {
5878 if (coverage == 0) {
5879 // nothing
5880 } else if (coverage == 255) {
5881 dest[x] = srcColor;
5882 } else {
5883 dest[x] = BYTE_MUL_RGB16(srcColor, coverage)
5884 + BYTE_MUL_RGB16(dest[x], 255 - coverage);
5885 }
5886 }
5887
qt_alphamapblit_quint16(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride,const QClipData * clip,bool useGammaCorrection)5888 void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
5889 int x, int y, const QRgba64 &color,
5890 const uchar *map,
5891 int mapWidth, int mapHeight, int mapStride,
5892 const QClipData *clip, bool useGammaCorrection)
5893 {
5894 if (useGammaCorrection || !color.isOpaque()) {
5895 qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
5896 return;
5897 }
5898
5899 const quint16 c = color.toRgb16();
5900
5901 if (!clip) {
5902 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
5903 const int destStride = rasterBuffer->stride<quint16>();
5904 while (mapHeight--) {
5905 for (int i = 0; i < mapWidth; ++i)
5906 alphamapblend_quint16(map[i], dest, i, c);
5907 dest += destStride;
5908 map += mapStride;
5909 }
5910 } else {
5911 int top = qMax(y, 0);
5912 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5913 map += (top - y) * mapStride;
5914
5915 const_cast<QClipData *>(clip)->initialize();
5916 for (int yp = top; yp<bottom; ++yp) {
5917 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5918
5919 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(yp));
5920
5921 for (int i=0; i<line.count; ++i) {
5922 const QSpan &clip = line.spans[i];
5923
5924 int start = qMax<int>(x, clip.x);
5925 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5926
5927 for (int xp=start; xp<end; ++xp)
5928 alphamapblend_quint16(map[xp - x], dest, xp, c);
5929 } // for (i -> line.count)
5930 map += mapStride;
5931 } // for (yp -> bottom)
5932 }
5933 }
5934
qt_alphamapblit_argb32(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uchar * map,int mapWidth,int mapHeight,int mapStride,const QClipData * clip,bool useGammaCorrection)5935 static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer,
5936 int x, int y, const QRgba64 &color,
5937 const uchar *map,
5938 int mapWidth, int mapHeight, int mapStride,
5939 const QClipData *clip, bool useGammaCorrection)
5940 {
5941 const quint32 c = color.toArgb32();
5942 const int destStride = rasterBuffer->stride<quint32>();
5943
5944 if (color.isTransparent())
5945 return;
5946
5947 const QColorTrcLut *colorProfile = nullptr;
5948
5949 if (useGammaCorrection)
5950 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5951
5952 QRgba64 srcColor = color;
5953 if (colorProfile && color.isOpaque())
5954 srcColor = colorProfile->toLinear(srcColor);
5955
5956 if (!clip) {
5957 quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
5958 while (mapHeight--) {
5959 for (int i = 0; i < mapWidth; ++i) {
5960 const int coverage = map[i];
5961 alphamapblend_argb32(dest + i, coverage, srcColor, c, colorProfile);
5962 }
5963 dest += destStride;
5964 map += mapStride;
5965 }
5966 } else {
5967 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5968
5969 int top = qMax(y, 0);
5970 map += (top - y) * mapStride;
5971
5972 const_cast<QClipData *>(clip)->initialize();
5973 for (int yp = top; yp<bottom; ++yp) {
5974 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5975
5976 quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(yp));
5977
5978 for (int i=0; i<line.count; ++i) {
5979 const QSpan &clip = line.spans[i];
5980
5981 int start = qMax<int>(x, clip.x);
5982 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5983
5984 for (int xp=start; xp<end; ++xp) {
5985 const int coverage = map[xp - x];
5986 alphamapblend_argb32(dest + xp, coverage, srcColor, c, colorProfile);
5987 } // for (i -> line.count)
5988 } // for (yp -> bottom)
5989 map += mapStride;
5990 }
5991 }
5992 }
5993
qRgbAvg(QRgb rgb)5994 static inline int qRgbAvg(QRgb rgb)
5995 {
5996 return (qRed(rgb) * 5 + qGreen(rgb) * 6 + qBlue(rgb) * 5) / 16;
5997 }
5998
rgbBlendPixel(quint32 * dst,int coverage,QRgba64 slinear,const QColorTrcLut * colorProfile)5999 static inline void rgbBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
6000 {
6001 // Do a gammacorrected RGB alphablend...
6002 const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(*dst) : QRgba64::fromArgb32(*dst);
6003
6004 QRgba64 blend = rgbBlend(dlinear, slinear, coverage);
6005
6006 *dst = colorProfile ? colorProfile->fromLinear64(blend) : toArgb32(blend);
6007 }
6008
rgbBlend(QRgb d,QRgb s,uint rgbAlpha)6009 static inline QRgb rgbBlend(QRgb d, QRgb s, uint rgbAlpha)
6010 {
6011 #if defined(__SSE2__)
6012 __m128i vd = _mm_cvtsi32_si128(d);
6013 __m128i vs = _mm_cvtsi32_si128(s);
6014 __m128i va = _mm_cvtsi32_si128(rgbAlpha);
6015 const __m128i vz = _mm_setzero_si128();
6016 vd = _mm_unpacklo_epi8(vd, vz);
6017 vs = _mm_unpacklo_epi8(vs, vz);
6018 va = _mm_unpacklo_epi8(va, vz);
6019 __m128i vb = _mm_xor_si128(_mm_set1_epi16(255), va);
6020 vs = _mm_mullo_epi16(vs, va);
6021 vd = _mm_mullo_epi16(vd, vb);
6022 vd = _mm_add_epi16(vd, vs);
6023 vd = _mm_add_epi16(vd, _mm_srli_epi16(vd, 8));
6024 vd = _mm_add_epi16(vd, _mm_set1_epi16(0x80));
6025 vd = _mm_srli_epi16(vd, 8);
6026 vd = _mm_packus_epi16(vd, vd);
6027 return _mm_cvtsi128_si32(vd);
6028 #else
6029 const int dr = qRed(d);
6030 const int dg = qGreen(d);
6031 const int db = qBlue(d);
6032
6033 const int sr = qRed(s);
6034 const int sg = qGreen(s);
6035 const int sb = qBlue(s);
6036
6037 const int mr = qRed(rgbAlpha);
6038 const int mg = qGreen(rgbAlpha);
6039 const int mb = qBlue(rgbAlpha);
6040
6041 const int nr = qt_div_255(sr * mr + dr * (255 - mr));
6042 const int ng = qt_div_255(sg * mg + dg * (255 - mg));
6043 const int nb = qt_div_255(sb * mb + db * (255 - mb));
6044
6045 return 0xff000000 | (nr << 16) | (ng << 8) | nb;
6046 #endif
6047 }
6048
alphargbblend_argb32(quint32 * dst,uint coverage,const QRgba64 & srcLinear,quint32 src,const QColorTrcLut * colorProfile)6049 static inline void alphargbblend_argb32(quint32 *dst, uint coverage, const QRgba64 &srcLinear, quint32 src, const QColorTrcLut *colorProfile)
6050 {
6051 if (coverage == 0xff000000) {
6052 // nothing
6053 } else if (coverage == 0xffffffff && qAlpha(src) == 255) {
6054 blend_pixel(*dst, src);
6055 } else if (*dst < 0xff000000) {
6056 // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
6057 blend_pixel(*dst, src, qRgbAvg(coverage));
6058 } else if (!colorProfile) {
6059 // First do naive blend with text-color
6060 QRgb s = *dst;
6061 blend_pixel(s, src);
6062 // Then a naive blend with glyph shape
6063 *dst = rgbBlend(*dst, s, coverage);
6064 } else if (srcLinear.isOpaque()) {
6065 rgbBlendPixel(dst, coverage, srcLinear, colorProfile);
6066 } else {
6067 // First do naive blend with text-color
6068 QRgb s = *dst;
6069 blend_pixel(s, src);
6070 // Then gamma-corrected blend with glyph shape
6071 QRgba64 s64 = colorProfile ? colorProfile->toLinear64(s) : QRgba64::fromArgb32(s);
6072 rgbBlendPixel(dst, coverage, s64, colorProfile);
6073 }
6074 }
6075
6076 #if QT_CONFIG(raster_64bit)
rgbBlendPixel(QRgba64 & dst,int coverage,QRgba64 slinear,const QColorTrcLut * colorProfile)6077 static inline void rgbBlendPixel(QRgba64 &dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
6078 {
6079 // Do a gammacorrected RGB alphablend...
6080 const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(dst) : dst;
6081
6082 QRgba64 blend = rgbBlend(dlinear, slinear, coverage);
6083
6084 dst = colorProfile ? colorProfile->fromLinear(blend) : blend;
6085 }
6086
alphargbblend_generic(uint coverage,QRgba64 * dest,int x,const QRgba64 & srcLinear,const QRgba64 & src,const QColorTrcLut * colorProfile)6087 static inline void alphargbblend_generic(uint coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
6088 {
6089 if (coverage == 0xff000000) {
6090 // nothing
6091 } else if (coverage == 0xffffffff) {
6092 blend_pixel(dest[x], src);
6093 } else if (!dest[x].isOpaque()) {
6094 // Do a gray alphablend.
6095 alphamapblend_generic(qRgbAvg(coverage), dest, x, srcLinear, src, colorProfile);
6096 } else if (src.isOpaque()) {
6097 rgbBlendPixel(dest[x], coverage, srcLinear, colorProfile);
6098 } else {
6099 // First do naive blend with text-color
6100 QRgba64 s = dest[x];
6101 blend_pixel(s, src);
6102 // Then gamma-corrected blend with glyph shape
6103 if (colorProfile)
6104 s = colorProfile->toLinear(s);
6105 rgbBlendPixel(dest[x], coverage, s, colorProfile);
6106 }
6107 }
6108
qt_alphargbblit_generic(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uint * src,int mapWidth,int mapHeight,int srcStride,const QClipData * clip,bool useGammaCorrection)6109 static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
6110 int x, int y, const QRgba64 &color,
6111 const uint *src, int mapWidth, int mapHeight, int srcStride,
6112 const QClipData *clip, bool useGammaCorrection)
6113 {
6114 if (color.isTransparent())
6115 return;
6116
6117 const QColorTrcLut *colorProfile = nullptr;
6118
6119 if (useGammaCorrection)
6120 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6121
6122 QRgba64 srcColor = color;
6123 if (colorProfile && color.isOpaque())
6124 srcColor = colorProfile->toLinear(srcColor);
6125
6126 alignas(8) QRgba64 buffer[BufferSize];
6127 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
6128 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
6129
6130 if (!clip) {
6131 for (int ly = 0; ly < mapHeight; ++ly) {
6132 int i = x;
6133 int length = mapWidth;
6134 while (length > 0) {
6135 int l = qMin(BufferSize, length);
6136 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
6137 for (int j=0; j < l; ++j) {
6138 const uint coverage = src[j + (i - x)];
6139 alphargbblend_generic(coverage, dest, j, srcColor, color, colorProfile);
6140 }
6141 if (destStore64)
6142 destStore64(rasterBuffer, i, y + ly, dest, l);
6143 length -= l;
6144 i += l;
6145 }
6146 src += srcStride;
6147 }
6148 } else {
6149 int bottom = qMin(y + mapHeight, rasterBuffer->height());
6150
6151 int top = qMax(y, 0);
6152 src += (top - y) * srcStride;
6153
6154 const_cast<QClipData *>(clip)->initialize();
6155 for (int yp = top; yp<bottom; ++yp) {
6156 const QClipData::ClipLine &line = clip->m_clipLines[yp];
6157
6158 for (int i=0; i<line.count; ++i) {
6159 const QSpan &clip = line.spans[i];
6160
6161 int start = qMax<int>(x, clip.x);
6162 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
6163 if (end <= start)
6164 continue;
6165 Q_ASSERT(end - start <= BufferSize);
6166 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
6167
6168 for (int xp=start; xp<end; ++xp) {
6169 const uint coverage = src[xp - x];
6170 alphargbblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile);
6171 }
6172 if (destStore64)
6173 destStore64(rasterBuffer, start, clip.y, dest, end - start);
6174 } // for (i -> line.count)
6175 src += srcStride;
6176 } // for (yp -> bottom)
6177 }
6178 }
6179 #else
qt_alphargbblit_generic(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uint * src,int mapWidth,int mapHeight,int srcStride,const QClipData * clip,bool useGammaCorrection)6180 static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
6181 int x, int y, const QRgba64 &color,
6182 const uint *src, int mapWidth, int mapHeight, int srcStride,
6183 const QClipData *clip, bool useGammaCorrection)
6184 {
6185 if (color.isTransparent())
6186 return;
6187
6188 const quint32 c = color.toArgb32();
6189
6190 const QColorTrcLut *colorProfile = nullptr;
6191
6192 if (useGammaCorrection)
6193 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6194
6195 QRgba64 srcColor = color;
6196 if (colorProfile && color.isOpaque())
6197 srcColor = colorProfile->toLinear(srcColor);
6198
6199 quint32 buffer[BufferSize];
6200 const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
6201 const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
6202
6203 if (!clip) {
6204 for (int ly = 0; ly < mapHeight; ++ly) {
6205 int i = x;
6206 int length = mapWidth;
6207 while (length > 0) {
6208 int l = qMin(BufferSize, length);
6209 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
6210 for (int j=0; j < l; ++j) {
6211 const uint coverage = src[j + (i - x)];
6212 alphargbblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
6213 }
6214 if (destStore)
6215 destStore(rasterBuffer, i, y + ly, dest, l);
6216 length -= l;
6217 i += l;
6218 }
6219 src += srcStride;
6220 }
6221 } else {
6222 int bottom = qMin(y + mapHeight, rasterBuffer->height());
6223
6224 int top = qMax(y, 0);
6225 src += (top - y) * srcStride;
6226
6227 const_cast<QClipData *>(clip)->initialize();
6228 for (int yp = top; yp<bottom; ++yp) {
6229 const QClipData::ClipLine &line = clip->m_clipLines[yp];
6230
6231 for (int i=0; i<line.count; ++i) {
6232 const QSpan &clip = line.spans[i];
6233
6234 int start = qMax<int>(x, clip.x);
6235 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
6236 if (end <= start)
6237 continue;
6238 Q_ASSERT(end - start <= BufferSize);
6239 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
6240
6241 for (int xp=start; xp<end; ++xp) {
6242 const uint coverage = src[xp - x];
6243 alphargbblend_argb32(dest + xp - start, coverage, srcColor, c, colorProfile);
6244 }
6245 if (destStore)
6246 destStore(rasterBuffer, start, clip.y, dest, end - start);
6247 } // for (i -> line.count)
6248 src += srcStride;
6249 } // for (yp -> bottom)
6250 }
6251 }
6252 #endif
6253
qt_alphargbblit_argb32(QRasterBuffer * rasterBuffer,int x,int y,const QRgba64 & color,const uint * src,int mapWidth,int mapHeight,int srcStride,const QClipData * clip,bool useGammaCorrection)6254 static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
6255 int x, int y, const QRgba64 &color,
6256 const uint *src, int mapWidth, int mapHeight, int srcStride,
6257 const QClipData *clip, bool useGammaCorrection)
6258 {
6259 if (color.isTransparent())
6260 return;
6261
6262 const quint32 c = color.toArgb32();
6263
6264 const QColorTrcLut *colorProfile = nullptr;
6265
6266 if (useGammaCorrection)
6267 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6268
6269 QRgba64 srcColor = color;
6270 if (colorProfile && color.isOpaque())
6271 srcColor = colorProfile->toLinear(srcColor);
6272
6273 if (!clip) {
6274 quint32 *dst = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
6275 const int destStride = rasterBuffer->stride<quint32>();
6276 while (mapHeight--) {
6277 for (int i = 0; i < mapWidth; ++i) {
6278 const uint coverage = src[i];
6279 alphargbblend_argb32(dst + i, coverage, srcColor, c, colorProfile);
6280 }
6281
6282 dst += destStride;
6283 src += srcStride;
6284 }
6285 } else {
6286 int bottom = qMin(y + mapHeight, rasterBuffer->height());
6287
6288 int top = qMax(y, 0);
6289 src += (top - y) * srcStride;
6290
6291 const_cast<QClipData *>(clip)->initialize();
6292 for (int yp = top; yp<bottom; ++yp) {
6293 const QClipData::ClipLine &line = clip->m_clipLines[yp];
6294
6295 quint32 *dst = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(yp));
6296
6297 for (int i=0; i<line.count; ++i) {
6298 const QSpan &clip = line.spans[i];
6299
6300 int start = qMax<int>(x, clip.x);
6301 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
6302
6303 for (int xp=start; xp<end; ++xp) {
6304 const uint coverage = src[xp - x];
6305 alphargbblend_argb32(dst + xp, coverage, srcColor, c, colorProfile);
6306 }
6307 } // for (i -> line.count)
6308 src += srcStride;
6309 } // for (yp -> bottom)
6310
6311 }
6312 }
6313
qt_rectfill_argb32(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6314 static void qt_rectfill_argb32(QRasterBuffer *rasterBuffer,
6315 int x, int y, int width, int height,
6316 const QRgba64 &color)
6317 {
6318 qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6319 color.toArgb32(), x, y, width, height, rasterBuffer->bytesPerLine());
6320 }
6321
qt_rectfill_quint16(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6322 static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer,
6323 int x, int y, int width, int height,
6324 const QRgba64 &color)
6325 {
6326 const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6327 quint32 c32 = color.toArgb32();
6328 quint16 c16;
6329 layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c16), &c32, 0, 1, nullptr, nullptr);
6330 qt_rectfill<quint16>(reinterpret_cast<quint16 *>(rasterBuffer->buffer()),
6331 c16, x, y, width, height, rasterBuffer->bytesPerLine());
6332 }
6333
qt_rectfill_quint24(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6334 static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer,
6335 int x, int y, int width, int height,
6336 const QRgba64 &color)
6337 {
6338 const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6339 quint32 c32 = color.toArgb32();
6340 quint24 c24;
6341 layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c24), &c32, 0, 1, nullptr, nullptr);
6342 qt_rectfill<quint24>(reinterpret_cast<quint24 *>(rasterBuffer->buffer()),
6343 c24, x, y, width, height, rasterBuffer->bytesPerLine());
6344 }
6345
qt_rectfill_nonpremul_argb32(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6346 static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer,
6347 int x, int y, int width, int height,
6348 const QRgba64 &color)
6349 {
6350 qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6351 color.unpremultiplied().toArgb32(), x, y, width, height, rasterBuffer->bytesPerLine());
6352 }
6353
qt_rectfill_rgba(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6354 static void qt_rectfill_rgba(QRasterBuffer *rasterBuffer,
6355 int x, int y, int width, int height,
6356 const QRgba64 &color)
6357 {
6358 qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6359 ARGB2RGBA(color.toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
6360 }
6361
qt_rectfill_nonpremul_rgba(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6362 static void qt_rectfill_nonpremul_rgba(QRasterBuffer *rasterBuffer,
6363 int x, int y, int width, int height,
6364 const QRgba64 &color)
6365 {
6366 qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6367 ARGB2RGBA(color.unpremultiplied().toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
6368 }
6369
6370 template<QtPixelOrder PixelOrder>
qt_rectfill_rgb30(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6371 static void qt_rectfill_rgb30(QRasterBuffer *rasterBuffer,
6372 int x, int y, int width, int height,
6373 const QRgba64 &color)
6374 {
6375 qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6376 qConvertRgb64ToRgb30<PixelOrder>(color), x, y, width, height, rasterBuffer->bytesPerLine());
6377 }
6378
qt_rectfill_alpha(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6379 static void qt_rectfill_alpha(QRasterBuffer *rasterBuffer,
6380 int x, int y, int width, int height,
6381 const QRgba64 &color)
6382 {
6383 qt_rectfill<quint8>(reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6384 color.alpha() >> 8, x, y, width, height, rasterBuffer->bytesPerLine());
6385 }
6386
qt_rectfill_gray(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6387 static void qt_rectfill_gray(QRasterBuffer *rasterBuffer,
6388 int x, int y, int width, int height,
6389 const QRgba64 &color)
6390 {
6391 qt_rectfill<quint8>(reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6392 qGray(color.toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
6393 }
6394
qt_rectfill_quint64(QRasterBuffer * rasterBuffer,int x,int y,int width,int height,const QRgba64 & color)6395 static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer,
6396 int x, int y, int width, int height,
6397 const QRgba64 &color)
6398 {
6399 const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
6400 quint64 c64;
6401 store(reinterpret_cast<uchar *>(&c64), &color, 0, 1, nullptr, nullptr);
6402 qt_rectfill<quint64>(reinterpret_cast<quint64 *>(rasterBuffer->buffer()),
6403 c64, x, y, width, height, rasterBuffer->bytesPerLine());
6404 }
6405
6406 // Map table for destination image format. Contains function pointers
6407 // for blends of various types unto the destination
6408
6409 DrawHelper qDrawHelper[QImage::NImageFormats] =
6410 {
6411 // Format_Invalid,
6412 { nullptr, nullptr, nullptr, nullptr, nullptr },
6413 // Format_Mono,
6414 {
6415 blend_color_generic,
6416 nullptr, nullptr, nullptr, nullptr
6417 },
6418 // Format_MonoLSB,
6419 {
6420 blend_color_generic,
6421 nullptr, nullptr, nullptr, nullptr
6422 },
6423 // Format_Indexed8,
6424 {
6425 blend_color_generic,
6426 nullptr, nullptr, nullptr, nullptr
6427 },
6428 // Format_RGB32,
6429 {
6430 blend_color_argb,
6431 qt_bitmapblit_argb32,
6432 qt_alphamapblit_argb32,
6433 qt_alphargbblit_argb32,
6434 qt_rectfill_argb32
6435 },
6436 // Format_ARGB32,
6437 {
6438 blend_color_generic,
6439 qt_bitmapblit_argb32,
6440 qt_alphamapblit_argb32,
6441 qt_alphargbblit_argb32,
6442 qt_rectfill_nonpremul_argb32
6443 },
6444 // Format_ARGB32_Premultiplied
6445 {
6446 blend_color_argb,
6447 qt_bitmapblit_argb32,
6448 qt_alphamapblit_argb32,
6449 qt_alphargbblit_argb32,
6450 qt_rectfill_argb32
6451 },
6452 // Format_RGB16
6453 {
6454 blend_color_rgb16,
6455 qt_bitmapblit_quint16,
6456 qt_alphamapblit_quint16,
6457 qt_alphargbblit_generic,
6458 qt_rectfill_quint16
6459 },
6460 // Format_ARGB8565_Premultiplied
6461 {
6462 blend_color_generic,
6463 nullptr,
6464 qt_alphamapblit_generic,
6465 qt_alphargbblit_generic,
6466 qt_rectfill_quint24
6467 },
6468 // Format_RGB666
6469 {
6470 blend_color_generic,
6471 nullptr,
6472 qt_alphamapblit_generic,
6473 qt_alphargbblit_generic,
6474 qt_rectfill_quint24
6475 },
6476 // Format_ARGB6666_Premultiplied
6477 {
6478 blend_color_generic,
6479 nullptr,
6480 qt_alphamapblit_generic,
6481 qt_alphargbblit_generic,
6482 qt_rectfill_quint24
6483 },
6484 // Format_RGB555
6485 {
6486 blend_color_generic,
6487 nullptr,
6488 qt_alphamapblit_generic,
6489 qt_alphargbblit_generic,
6490 qt_rectfill_quint16
6491 },
6492 // Format_ARGB8555_Premultiplied
6493 {
6494 blend_color_generic,
6495 nullptr,
6496 qt_alphamapblit_generic,
6497 qt_alphargbblit_generic,
6498 qt_rectfill_quint24
6499 },
6500 // Format_RGB888
6501 {
6502 blend_color_generic,
6503 nullptr,
6504 qt_alphamapblit_generic,
6505 qt_alphargbblit_generic,
6506 qt_rectfill_quint24
6507 },
6508 // Format_RGB444
6509 {
6510 blend_color_generic,
6511 nullptr,
6512 qt_alphamapblit_generic,
6513 qt_alphargbblit_generic,
6514 qt_rectfill_quint16
6515 },
6516 // Format_ARGB4444_Premultiplied
6517 {
6518 blend_color_generic,
6519 nullptr,
6520 qt_alphamapblit_generic,
6521 qt_alphargbblit_generic,
6522 qt_rectfill_quint16
6523 },
6524 // Format_RGBX8888
6525 {
6526 blend_color_generic,
6527 qt_bitmapblit_rgba8888,
6528 qt_alphamapblit_generic,
6529 qt_alphargbblit_generic,
6530 qt_rectfill_rgba
6531 },
6532 // Format_RGBA8888
6533 {
6534 blend_color_generic,
6535 qt_bitmapblit_rgba8888,
6536 qt_alphamapblit_generic,
6537 qt_alphargbblit_generic,
6538 qt_rectfill_nonpremul_rgba
6539 },
6540 // Format_RGB8888_Premultiplied
6541 {
6542 blend_color_generic,
6543 qt_bitmapblit_rgba8888,
6544 qt_alphamapblit_generic,
6545 qt_alphargbblit_generic,
6546 qt_rectfill_rgba
6547 },
6548 // Format_BGR30
6549 {
6550 blend_color_generic_rgb64,
6551 qt_bitmapblit_rgb30<PixelOrderBGR>,
6552 qt_alphamapblit_generic,
6553 qt_alphargbblit_generic,
6554 qt_rectfill_rgb30<PixelOrderBGR>
6555 },
6556 // Format_A2BGR30_Premultiplied
6557 {
6558 blend_color_generic_rgb64,
6559 qt_bitmapblit_rgb30<PixelOrderBGR>,
6560 qt_alphamapblit_generic,
6561 qt_alphargbblit_generic,
6562 qt_rectfill_rgb30<PixelOrderBGR>
6563 },
6564 // Format_RGB30
6565 {
6566 blend_color_generic_rgb64,
6567 qt_bitmapblit_rgb30<PixelOrderRGB>,
6568 qt_alphamapblit_generic,
6569 qt_alphargbblit_generic,
6570 qt_rectfill_rgb30<PixelOrderRGB>
6571 },
6572 // Format_A2RGB30_Premultiplied
6573 {
6574 blend_color_generic_rgb64,
6575 qt_bitmapblit_rgb30<PixelOrderRGB>,
6576 qt_alphamapblit_generic,
6577 qt_alphargbblit_generic,
6578 qt_rectfill_rgb30<PixelOrderRGB>
6579 },
6580 // Format_Alpha8
6581 {
6582 blend_color_generic,
6583 nullptr,
6584 qt_alphamapblit_generic,
6585 qt_alphargbblit_generic,
6586 qt_rectfill_alpha
6587 },
6588 // Format_Grayscale8
6589 {
6590 blend_color_generic,
6591 nullptr,
6592 qt_alphamapblit_generic,
6593 qt_alphargbblit_generic,
6594 qt_rectfill_gray
6595 },
6596 // Format_RGBX64
6597 {
6598 blend_color_generic_rgb64,
6599 nullptr,
6600 qt_alphamapblit_generic,
6601 qt_alphargbblit_generic,
6602 qt_rectfill_quint64
6603 },
6604 // Format_RGBA64
6605 {
6606 blend_color_generic_rgb64,
6607 nullptr,
6608 qt_alphamapblit_generic,
6609 qt_alphargbblit_generic,
6610 qt_rectfill_quint64
6611 },
6612 // Format_RGBA64_Premultiplied
6613 {
6614 blend_color_generic_rgb64,
6615 nullptr,
6616 qt_alphamapblit_generic,
6617 qt_alphargbblit_generic,
6618 qt_rectfill_quint64
6619 },
6620 // Format_Grayscale16
6621 {
6622 blend_color_generic_rgb64,
6623 nullptr,
6624 qt_alphamapblit_generic,
6625 qt_alphargbblit_generic,
6626 qt_rectfill_quint16
6627 },
6628 // Format_BGR888
6629 {
6630 blend_color_generic,
6631 nullptr,
6632 qt_alphamapblit_generic,
6633 qt_alphargbblit_generic,
6634 qt_rectfill_quint24
6635 },
6636 };
6637
6638 #if !defined(__SSE2__)
qt_memfill64(quint64 * dest,quint64 color,qsizetype count)6639 void qt_memfill64(quint64 *dest, quint64 color, qsizetype count)
6640 {
6641 qt_memfill_template<quint64>(dest, color, count);
6642 }
6643 #endif
6644
6645 #if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG)
6646 __attribute__((optimize("no-tree-vectorize")))
6647 #endif
qt_memfill24(quint24 * dest,quint24 color,qsizetype count)6648 void qt_memfill24(quint24 *dest, quint24 color, qsizetype count)
6649 {
6650 # ifdef QT_COMPILER_SUPPORTS_SSSE3
6651 extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype);
6652 if (qCpuHasFeature(SSSE3))
6653 return qt_memfill24_ssse3(dest, color, count);
6654 # endif
6655
6656 const quint32 v = color;
6657 quint24 *end = dest + count;
6658
6659 // prolog: align dest to 32bit
6660 while ((quintptr(dest) & 0x3) && dest < end) {
6661 *dest++ = v;
6662 }
6663 if (dest >= end)
6664 return;
6665
6666 const uint val1 = qFromBigEndian((v << 8) | (v >> 16));
6667 const uint val2 = qFromBigEndian((v << 16) | (v >> 8));
6668 const uint val3 = qFromBigEndian((v << 24) | (v >> 0));
6669
6670 for ( ; dest <= (end - 4); dest += 4) {
6671 quint32 *dst = reinterpret_cast<quint32 *>(dest);
6672 dst[0] = val1;
6673 dst[1] = val2;
6674 dst[2] = val3;
6675 }
6676
6677 // less than 4px left
6678 switch (end - dest) {
6679 case 3:
6680 *dest++ = v;
6681 Q_FALLTHROUGH();
6682 case 2:
6683 *dest++ = v;
6684 Q_FALLTHROUGH();
6685 case 1:
6686 *dest++ = v;
6687 }
6688 }
6689
qt_memfill16(quint16 * dest,quint16 value,qsizetype count)6690 void qt_memfill16(quint16 *dest, quint16 value, qsizetype count)
6691 {
6692 const int align = quintptr(dest) & 0x3;
6693 if (align) {
6694 *dest++ = value;
6695 --count;
6696 }
6697
6698 if (count & 0x1)
6699 dest[count - 1] = value;
6700
6701 const quint32 value32 = (value << 16) | value;
6702 qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / 2);
6703 }
6704
6705 #if !defined(__SSE2__) && !defined(__ARM_NEON__) && !defined(__MIPS_DSP__)
qt_memfill32(quint32 * dest,quint32 color,qsizetype count)6706 void qt_memfill32(quint32 *dest, quint32 color, qsizetype count)
6707 {
6708 qt_memfill_template<quint32>(dest, color, count);
6709 }
6710 #endif
6711 #ifdef __SSE2__
6712 decltype(qt_memfill32_sse2) *qt_memfill32 = nullptr;
6713 decltype(qt_memfill64_sse2) *qt_memfill64 = nullptr;
6714 #endif
6715
6716 #ifdef QT_COMPILER_SUPPORTS_SSE4_1
6717 template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *);
6718 #endif
6719
6720 extern void qInitBlendFunctions();
6721
qInitDrawhelperFunctions()6722 static void qInitDrawhelperFunctions()
6723 {
6724 // Set up basic blend function tables.
6725 qInitBlendFunctions();
6726
6727 #ifdef __SSE2__
6728 # ifndef __AVX2__
6729 qt_memfill32 = qt_memfill32_sse2;
6730 qt_memfill64 = qt_memfill64_sse2;
6731 # endif
6732 qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
6733 qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
6734 qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
6735 qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
6736 qDrawHelper[QImage::Format_RGBX8888].bitmapBlit = qt_bitmapblit8888_sse2;
6737 qDrawHelper[QImage::Format_RGBA8888].bitmapBlit = qt_bitmapblit8888_sse2;
6738 qDrawHelper[QImage::Format_RGBA8888_Premultiplied].bitmapBlit = qt_bitmapblit8888_sse2;
6739
6740 extern void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6741 const uchar *srcPixels, int sbpl, int srch,
6742 const QRectF &targetRect,
6743 const QRectF &sourceRect,
6744 const QRect &clip,
6745 int const_alpha);
6746 qScaleFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6747 qScaleFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6748 qScaleFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6749 qScaleFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6750
6751 extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
6752 const uchar *srcPixels, int sbpl,
6753 int w, int h,
6754 int const_alpha);
6755 extern void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6756 const uchar *srcPixels, int sbpl,
6757 int w, int h,
6758 int const_alpha);
6759
6760 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6761 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6762 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6763 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6764 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6765 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6766 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6767 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6768
6769 extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
6770 int y, int x, int length);
6771
6772 qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
6773
6774 extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6775 extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6776 extern void QT_FASTCALL comp_func_Source_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6777 extern void QT_FASTCALL comp_func_solid_Source_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6778 extern void QT_FASTCALL comp_func_Plus_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6779 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_sse2;
6780 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_sse2;
6781 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
6782 qt_functionForModeSolid_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_sse2;
6783 qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
6784
6785 #ifdef QT_COMPILER_SUPPORTS_SSSE3
6786 if (qCpuHasFeature(SSSE3)) {
6787 extern void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
6788 const uchar *srcPixels, int sbpl,
6789 int w, int h,
6790 int const_alpha);
6791
6792 extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data,
6793 int y, int x, int length);
6794 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6795 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6796 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6797 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6798 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3;
6799 extern void QT_FASTCALL rbSwap_888_ssse3(uchar *dst, const uchar *src, int count);
6800 qPixelLayouts[QImage::Format_RGB888].rbSwap = rbSwap_888_ssse3;
6801 qPixelLayouts[QImage::Format_BGR888].rbSwap = rbSwap_888_ssse3;
6802 }
6803 #endif // SSSE3
6804
6805 #if defined(QT_COMPILER_SUPPORTS_SSE4_1)
6806 if (qCpuHasFeature(SSE4_1)) {
6807 extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *);
6808 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *);
6809 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6810 const QVector<QRgb> *, QDitherInfo *);
6811 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6812 const QVector<QRgb> *, QDitherInfo *);
6813 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6814 const QVector<QRgb> *, QDitherInfo *);
6815 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6816 const QVector<QRgb> *, QDitherInfo *);
6817 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6818 const QVector<QRgb> *, QDitherInfo *);
6819 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6820 const QVector<QRgb> *, QDitherInfo *);
6821 extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6822 const QVector<QRgb> *, QDitherInfo *);
6823 extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6824 const QVector<QRgb> *, QDitherInfo *);
6825 extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6826 const QVector<QRgb> *, QDitherInfo *);
6827 extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6828 const QVector<QRgb> *, QDitherInfo *);
6829 extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6830 const QVector<QRgb> *, QDitherInfo *);
6831 extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6832 extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6833 # ifndef __AVX2__
6834 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
6835 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
6836 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4;
6837 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
6838 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_sse4;
6839 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_sse4;
6840 qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6841 qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6842 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6843 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6844 # endif
6845 qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4;
6846 qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4;
6847 qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4;
6848 qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>;
6849 qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>;
6850 qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4;
6851 qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4;
6852 #if QT_CONFIG(raster_64bit)
6853 destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4;
6854 destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4;
6855 #endif
6856 }
6857 #endif
6858
6859 #if defined(QT_COMPILER_SUPPORTS_AVX2)
6860 if (qCpuHasFeature(ArchHaswell)) {
6861 qt_memfill32 = qt_memfill32_avx2;
6862 qt_memfill64 = qt_memfill64_avx2;
6863 extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
6864 const uchar *srcPixels, int sbpl,
6865 int w, int h, int const_alpha);
6866 extern void qt_blend_argb32_on_argb32_avx2(uchar *destPixels, int dbpl,
6867 const uchar *srcPixels, int sbpl,
6868 int w, int h, int const_alpha);
6869 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6870 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6871 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6872 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6873 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6874 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6875 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6876 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6877
6878 extern void QT_FASTCALL comp_func_Source_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6879 extern void QT_FASTCALL comp_func_SourceOver_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6880 extern void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint *destPixels, int length, uint color, uint const_alpha);
6881 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_avx2;
6882 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_avx2;
6883 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2;
6884 #if QT_CONFIG(raster_64bit)
6885 extern void QT_FASTCALL comp_func_Source_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6886 extern void QT_FASTCALL comp_func_SourceOver_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6887 extern void QT_FASTCALL comp_func_solid_SourceOver_rgb64_avx2(QRgba64 *destPixels, int length, QRgba64 color, uint const_alpha);
6888 qt_functionForMode64_C[QPainter::CompositionMode_Source] = comp_func_Source_rgb64_avx2;
6889 qt_functionForMode64_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgb64_avx2;
6890 qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2;
6891 #endif
6892
6893 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6894 int &fx, int &fy, int fdx, int /*fdy*/);
6895 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6896 int &fx, int &fy, int fdx, int /*fdy*/);
6897 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image,
6898 int &fx, int &fy, int fdx, int fdy);
6899
6900 bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2;
6901 bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2;
6902 bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2;
6903
6904 extern void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *);
6905 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *);
6906 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6907 const QVector<QRgb> *, QDitherInfo *);
6908 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6909 const QVector<QRgb> *, QDitherInfo *);
6910 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_avx2;
6911 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
6912 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2;
6913 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
6914
6915 #if QT_CONFIG(raster_64bit)
6916 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *, const uint *, int, const QVector<QRgb> *, QDitherInfo *);
6917 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uint *, int count, const QVector<QRgb> *, QDitherInfo *);
6918 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QVector<QRgb> *, QDitherInfo *);
6919 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QVector<QRgb> *, QDitherInfo *);
6920 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2;
6921 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2;
6922 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2;
6923 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2;
6924 #endif
6925 }
6926 #endif
6927
6928 #endif // SSE2
6929
6930 #if defined(__ARM_NEON__)
6931 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6932 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6933 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6934 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6935 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6936 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6937 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6938 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6939 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6940 #endif
6941
6942 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
6943 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
6944 qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
6945
6946 extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
6947 int y, int x, int length);
6948
6949 qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
6950
6951 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
6952
6953 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6954 extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *);
6955 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *);
6956 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6957 const QVector<QRgb> *, QDitherInfo *);
6958 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6959 const QVector<QRgb> *, QDitherInfo *);
6960 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6961 const QVector<QRgb> *, QDitherInfo *);
6962 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6963 const QVector<QRgb> *, QDitherInfo *);
6964 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6965 const QVector<QRgb> *, QDitherInfo *);
6966 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6967 const QVector<QRgb> *, QDitherInfo *);
6968 extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6969 const QVector<QRgb> *, QDitherInfo *);
6970 extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6971 const QVector<QRgb> *, QDitherInfo *);
6972 extern void QT_FASTCALL storeRGBXFromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6973 const QVector<QRgb> *, QDitherInfo *);
6974 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon;
6975 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
6976 qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon;
6977 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_neon;
6978 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_neon;
6979 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon;
6980 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
6981 qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon;
6982 qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6983 qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6984 qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon;
6985 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6986 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6987 #endif
6988
6989 #if defined(ENABLE_PIXMAN_DRAWHELPERS)
6990 // The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
6991 qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
6992 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
6993 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
6994
6995 qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
6996 qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
6997
6998 qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
6999 qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
7000
7001 qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
7002
7003 destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
7004 destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
7005
7006 qMemRotateFunctions[QPixelLayout::BPP16][0] = qt_memrotate90_16_neon;
7007 qMemRotateFunctions[QPixelLayout::BPP16][2] = qt_memrotate270_16_neon;
7008 #endif
7009 #endif // defined(__ARM_NEON__)
7010
7011 #if defined(__MIPS_DSP__)
7012 // Composition functions are all DSP r1
7013 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
7014 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
7015 qt_functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
7016 qt_functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
7017 qt_functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
7018 qt_functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
7019 qt_functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
7020 qt_functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
7021 qt_functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
7022 qt_functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
7023
7024 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
7025 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
7026 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
7027 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
7028 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
7029 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
7030 qt_functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
7031 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
7032
7033 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
7034 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
7035 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
7036 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
7037
7038 destFetchProc[QImage::Format_ARGB32] = qt_destFetchARGB32_mips_dsp;
7039
7040 destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
7041
7042 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
7043 sourceFetchUntransformed[QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
7044 sourceFetchUntransformed[QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
7045
7046 #if defined(__MIPS_DSPR2__)
7047 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
7048 sourceFetchUntransformed[QImage::Format_RGB16] = qt_fetchUntransformedRGB16_mips_dspr2;
7049 #else
7050 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
7051 #endif // defined(__MIPS_DSPR2__)
7052 #endif // defined(__MIPS_DSP__)
7053 }
7054
7055 // Ensure initialization if this object file is linked.
7056 Q_CONSTRUCTOR_FUNCTION(qInitDrawhelperFunctions);
7057
7058 QT_END_NAMESPACE
7059