1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtGui module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include "private/qmemrotate_p.h"
41 
42 QT_BEGIN_NAMESPACE
43 
44 static const int tileSize = 32;
45 
46 template <class T>
47 static
qt_memrotate90_tiled(const T * src,int w,int h,int sstride,T * dest,int dstride)48 inline void qt_memrotate90_tiled(const T *src, int w, int h, int sstride, T *dest, int dstride)
49 {
50     sstride /= sizeof(T);
51     dstride /= sizeof(T);
52 
53     const int pack = sizeof(quint32) / sizeof(T);
54     const int unaligned =
55         qMin(uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(T)), uint(h));
56     const int restX = w % tileSize;
57     const int restY = (h - unaligned) % tileSize;
58     const int unoptimizedY = restY % pack;
59     const int numTilesX = w / tileSize + (restX > 0);
60     const int numTilesY = (h - unaligned) / tileSize + (restY >= pack);
61 
62     for (int tx = 0; tx < numTilesX; ++tx) {
63         const int startx = w - tx * tileSize - 1;
64         const int stopx = qMax(startx - tileSize, 0);
65 
66         if (unaligned) {
67             for (int x = startx; x >= stopx; --x) {
68                 T *d = dest + (w - x - 1) * dstride;
69                 for (int y = 0; y < unaligned; ++y) {
70                     *d++ = src[y * sstride + x];
71                 }
72             }
73         }
74 
75         for (int ty = 0; ty < numTilesY; ++ty) {
76             const int starty = ty * tileSize + unaligned;
77             const int stopy = qMin(starty + tileSize, h - unoptimizedY);
78 
79             for (int x = startx; x >= stopx; --x) {
80                 quint32 *d = reinterpret_cast<quint32*>(dest + (w - x - 1) * dstride + starty);
81                 for (int y = starty; y < stopy; y += pack) {
82                     quint32 c = src[y * sstride + x];
83                     for (int i = 1; i < pack; ++i) {
84                         const int shift = (sizeof(T) * 8 * i);
85                         const T color = src[(y + i) * sstride + x];
86                         c |= color << shift;
87                     }
88                     *d++ = c;
89                 }
90             }
91         }
92 
93         if (unoptimizedY) {
94             const int starty = h - unoptimizedY;
95             for (int x = startx; x >= stopx; --x) {
96                 T *d = dest + (w - x - 1) * dstride + starty;
97                 for (int y = starty; y < h; ++y) {
98                     *d++ = src[y * sstride + x];
99                 }
100             }
101         }
102     }
103 }
104 
105 template <class T>
106 static
qt_memrotate90_tiled_unpacked(const T * src,int w,int h,int sstride,T * dest,int dstride)107 inline void qt_memrotate90_tiled_unpacked(const T *src, int w, int h, int sstride, T *dest,
108                                           int dstride)
109 {
110     const int numTilesX = (w + tileSize - 1) / tileSize;
111     const int numTilesY = (h + tileSize - 1) / tileSize;
112 
113     for (int tx = 0; tx < numTilesX; ++tx) {
114         const int startx = w - tx * tileSize - 1;
115         const int stopx = qMax(startx - tileSize, 0);
116 
117         for (int ty = 0; ty < numTilesY; ++ty) {
118             const int starty = ty * tileSize;
119             const int stopy = qMin(starty + tileSize, h);
120 
121             for (int x = startx; x >= stopx; --x) {
122                 T *d = (T *)((char*)dest + (w - x - 1) * dstride) + starty;
123                 const char *s = (const char*)(src + x) + starty * sstride;
124                 for (int y = starty; y < stopy; ++y) {
125                     *d++ = *(const T *)(s);
126                     s += sstride;
127                 }
128             }
129         }
130     }
131 }
132 
133 template <class T>
134 static
qt_memrotate270_tiled(const T * src,int w,int h,int sstride,T * dest,int dstride)135 inline void qt_memrotate270_tiled(const T *src, int w, int h, int sstride, T *dest, int dstride)
136 {
137     sstride /= sizeof(T);
138     dstride /= sizeof(T);
139 
140     const int pack = sizeof(quint32) / sizeof(T);
141     const int unaligned =
142         qMin(uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(T)), uint(h));
143     const int restX = w % tileSize;
144     const int restY = (h - unaligned) % tileSize;
145     const int unoptimizedY = restY % pack;
146     const int numTilesX = w / tileSize + (restX > 0);
147     const int numTilesY = (h - unaligned) / tileSize + (restY >= pack);
148 
149     for (int tx = 0; tx < numTilesX; ++tx) {
150         const int startx = tx * tileSize;
151         const int stopx = qMin(startx + tileSize, w);
152 
153         if (unaligned) {
154             for (int x = startx; x < stopx; ++x) {
155                 T *d = dest + x * dstride;
156                 for (int y = h - 1; y >= h - unaligned; --y) {
157                     *d++ = src[y * sstride + x];
158                 }
159             }
160         }
161 
162         for (int ty = 0; ty < numTilesY; ++ty) {
163             const int starty = h - 1 - unaligned - ty * tileSize;
164             const int stopy = qMax(starty - tileSize, unoptimizedY);
165 
166             for (int x = startx; x < stopx; ++x) {
167                 quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride
168                                                         + h - 1 - starty);
169                 for (int y = starty; y >= stopy; y -= pack) {
170                     quint32 c = src[y * sstride + x];
171                     for (int i = 1; i < pack; ++i) {
172                         const int shift = (sizeof(T) * 8 * i);
173                         const T color = src[(y - i) * sstride + x];
174                         c |= color << shift;
175                     }
176                     *d++ = c;
177                 }
178             }
179         }
180         if (unoptimizedY) {
181             const int starty = unoptimizedY - 1;
182             for (int x = startx; x < stopx; ++x) {
183                 T *d = dest + x * dstride + h - 1 - starty;
184                 for (int y = starty; y >= 0; --y) {
185                     *d++ = src[y * sstride + x];
186                 }
187             }
188         }
189     }
190 }
191 
192 template <class T>
193 static
qt_memrotate270_tiled_unpacked(const T * src,int w,int h,int sstride,T * dest,int dstride)194 inline void qt_memrotate270_tiled_unpacked(const T *src, int w, int h, int sstride, T *dest,
195                                            int dstride)
196 {
197     const int numTilesX = (w + tileSize - 1) / tileSize;
198     const int numTilesY = (h + tileSize - 1) / tileSize;
199 
200     for (int tx = 0; tx < numTilesX; ++tx) {
201         const int startx = tx * tileSize;
202         const int stopx = qMin(startx + tileSize, w);
203 
204         for (int ty = 0; ty < numTilesY; ++ty) {
205             const int starty = h - 1 - ty * tileSize;
206             const int stopy = qMax(starty - tileSize, 0);
207 
208             for (int x = startx; x < stopx; ++x) {
209                 T *d = (T*)((char*)dest + x * dstride) + h - 1 - starty;
210                 const char *s = (const char*)(src + x) + starty * sstride;
211                 for (int y = starty; y >= stopy; --y) {
212                     *d++ = *(const T*)s;
213                     s -= sstride;
214                 }
215             }
216         }
217     }
218 }
219 
220 
221 template <class T>
222 static
qt_memrotate90_template(const T * src,int srcWidth,int srcHeight,int srcStride,T * dest,int dstStride)223 inline void qt_memrotate90_template(const T *src, int srcWidth, int srcHeight, int srcStride,
224                                     T *dest, int dstStride)
225 {
226 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
227     // packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer
228     if (sizeof(quint32) % sizeof(T) == 0)
229         qt_memrotate90_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
230     else
231 #endif
232     qt_memrotate90_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
233 }
234 
235 template <>
qt_memrotate90_template(const quint32 * src,int w,int h,int sstride,quint32 * dest,int dstride)236 inline void qt_memrotate90_template<quint32>(const quint32 *src, int w, int h, int sstride, quint32 *dest, int dstride)
237 {
238     // packed algorithm doesn't have any benefit for quint32
239     qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride);
240 }
241 
242 template <>
qt_memrotate90_template(const quint64 * src,int w,int h,int sstride,quint64 * dest,int dstride)243 inline void qt_memrotate90_template<quint64>(const quint64 *src, int w, int h, int sstride, quint64 *dest, int dstride)
244 {
245     qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride);
246 }
247 
248 template <class T>
249 static
qt_memrotate180_template(const T * src,int w,int h,int sstride,T * dest,int dstride)250 inline void qt_memrotate180_template(const T *src, int w, int h, int sstride, T *dest, int dstride)
251 {
252     const char *s = (const char*)(src) + (h - 1) * sstride;
253     for (int dy = 0; dy < h; ++dy) {
254         T *d = reinterpret_cast<T*>((char *)(dest) + dy * dstride);
255         src = reinterpret_cast<const T*>(s);
256         for (int dx = 0; dx < w; ++dx) {
257             d[dx] = src[w - 1 - dx];
258         }
259         s -= sstride;
260     }
261 }
262 
263 template <class T>
264 static
qt_memrotate270_template(const T * src,int srcWidth,int srcHeight,int srcStride,T * dest,int dstStride)265 inline void qt_memrotate270_template(const T *src, int srcWidth, int srcHeight, int srcStride,
266                                      T *dest, int dstStride)
267 {
268 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
269     // packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer
270     if (sizeof(quint32) % sizeof(T) == 0)
271         qt_memrotate270_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
272     else
273 #endif
274     qt_memrotate270_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
275 }
276 
277 template <>
qt_memrotate270_template(const quint32 * src,int w,int h,int sstride,quint32 * dest,int dstride)278 inline void qt_memrotate270_template<quint32>(const quint32 *src, int w, int h, int sstride, quint32 *dest, int dstride)
279 {
280     // packed algorithm doesn't have any benefit for quint32
281     qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride);
282 }
283 
284 template <>
qt_memrotate270_template(const quint64 * src,int w,int h,int sstride,quint64 * dest,int dstride)285 inline void qt_memrotate270_template<quint64>(const quint64 *src, int w, int h, int sstride, quint64 *dest, int dstride)
286 {
287     qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride);
288 }
289 
290 #define QT_IMPL_MEMROTATE(type)                                     \
291 Q_GUI_EXPORT void qt_memrotate90(const type *src, int w, int h, int sstride, \
292                                  type *dest, int dstride)           \
293 {                                                                   \
294     qt_memrotate90_template(src, w, h, sstride, dest, dstride);     \
295 }                                                                   \
296 Q_GUI_EXPORT void qt_memrotate180(const type *src, int w, int h, int sstride, \
297                                   type *dest, int dstride)          \
298 {                                                                   \
299     qt_memrotate180_template(src, w, h, sstride, dest, dstride);    \
300 }                                                                   \
301 Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \
302                                   type *dest, int dstride)          \
303 {                                                                   \
304     qt_memrotate270_template(src, w, h, sstride, dest, dstride);    \
305 }
306 
307 #define QT_IMPL_SIMPLE_MEMROTATE(type)                              \
308 Q_GUI_EXPORT void qt_memrotate90(const type *src, int w, int h, int sstride,  \
309                                  type *dest, int dstride)           \
310 {                                                                   \
311     qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride); \
312 }                                                                   \
313 Q_GUI_EXPORT void qt_memrotate180(const type *src, int w, int h, int sstride, \
314                                   type *dest, int dstride)          \
315 {                                                                   \
316     qt_memrotate180_template(src, w, h, sstride, dest, dstride);    \
317 }                                                                   \
318 Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \
319                                   type *dest, int dstride)          \
320 {                                                                   \
321     qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride); \
322 }
323 
324 QT_IMPL_MEMROTATE(quint64)
QT_IMPL_MEMROTATE(quint32)325 QT_IMPL_MEMROTATE(quint32)
326 QT_IMPL_MEMROTATE(quint16)
327 QT_IMPL_MEMROTATE(quint24)
328 QT_IMPL_MEMROTATE(quint8)
329 
330 void qt_memrotate90_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
331 {
332     qt_memrotate90(srcPixels, w, h, sbpl, destPixels, dbpl);
333 }
334 
qt_memrotate180_8(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)335 void qt_memrotate180_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
336 {
337     qt_memrotate180(srcPixels, w, h, sbpl, destPixels, dbpl);
338 }
339 
qt_memrotate270_8(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)340 void qt_memrotate270_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
341 {
342     qt_memrotate270(srcPixels, w, h, sbpl, destPixels, dbpl);
343 }
344 
qt_memrotate90_16(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)345 void qt_memrotate90_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
346 {
347     qt_memrotate90((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl);
348 }
349 
qt_memrotate180_16(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)350 void qt_memrotate180_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
351 {
352     qt_memrotate180((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl);
353 }
354 
qt_memrotate270_16(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)355 void qt_memrotate270_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
356 {
357     qt_memrotate270((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl);
358 }
359 
qt_memrotate90_24(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)360 void qt_memrotate90_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
361 {
362     qt_memrotate90((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl);
363 }
364 
qt_memrotate180_24(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)365 void qt_memrotate180_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
366 {
367     qt_memrotate180((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl);
368 }
369 
qt_memrotate270_24(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)370 void qt_memrotate270_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
371 {
372     qt_memrotate270((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl);
373 }
374 
qt_memrotate90_32(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)375 void qt_memrotate90_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
376 {
377     qt_memrotate90((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
378 }
379 
qt_memrotate180_32(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)380 void qt_memrotate180_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
381 {
382     qt_memrotate180((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
383 }
384 
qt_memrotate270_32(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)385 void qt_memrotate270_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
386 {
387     qt_memrotate270((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
388 }
389 
390 
qt_memrotate90_64(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)391 void qt_memrotate90_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
392 {
393     qt_memrotate90((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
394 }
395 
qt_memrotate180_64(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)396 void qt_memrotate180_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
397 {
398     qt_memrotate180((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
399 }
400 
qt_memrotate270_64(const uchar * srcPixels,int w,int h,int sbpl,uchar * destPixels,int dbpl)401 void qt_memrotate270_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
402 {
403     qt_memrotate270((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
404 }
405 
406 MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3] =
407 // 90, 180, 270
408 {
409     { nullptr, nullptr, nullptr },      // BPPNone,
410     { nullptr, nullptr, nullptr },      // BPP1MSB,
411     { nullptr, nullptr, nullptr },      // BPP1LSB,
412     { qt_memrotate90_8, qt_memrotate180_8, qt_memrotate270_8 },         // BPP8,
413     { qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 },      // BPP16,
414     { qt_memrotate90_24, qt_memrotate180_24, qt_memrotate270_24 },      // BPP24
415     { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 },      // BPP32
416     { qt_memrotate90_64, qt_memrotate180_64, qt_memrotate270_64 },      // BPP64
417 };
418 
419 QT_END_NAMESPACE
420