1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtGui module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include "qimagescale_p.h"
41 #include "qimage.h"
42 #include <private/qsimd_p.h>
43 
44 #if QT_CONFIG(thread) && !defined(Q_OS_WASM)
45 #include "qsemaphore.h"
46 #include "qthreadpool.h"
47 #endif
48 
49 #if defined(__ARM_NEON__)
50 
51 QT_BEGIN_NAMESPACE
52 
53 using namespace QImageScale;
54 
55 template<typename T>
multithread_pixels_function(QImageScaleInfo * isi,int dh,const T & scaleSection)56 static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
57 {
58 #if QT_CONFIG(thread) && !defined(Q_OS_WASM)
59     int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
60     segments = std::min(segments, dh);
61     QThreadPool *threadPool = QThreadPool::globalInstance();
62     if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
63         QSemaphore semaphore;
64         int y = 0;
65         for (int i = 0; i < segments; ++i) {
66             int yn = (dh - y) / (segments - i);
67             threadPool->start([&, y, yn]() {
68                 scaleSection(y, y + yn);
69                 semaphore.release(1);
70             });
71             y += yn;
72         }
73         semaphore.acquire(segments);
74         return;
75     }
76 #endif
77     scaleSection(0, dh);
78 }
79 
qt_qimageScaleAARGBA_helper(const unsigned int * pix,int xyap,int Cxy,int step)80 inline static uint32x4_t qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step)
81 {
82     uint32x2_t vpix32 = vmov_n_u32(*pix);
83     uint16x4_t vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
84     uint32x4_t vx = vmull_n_u16(vpix16, xyap);
85     int i;
86     for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
87         pix += step;
88         vpix32 = vmov_n_u32(*pix);
89         vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
90         vx = vaddq_u32(vx, vmull_n_u16(vpix16, Cxy));
91     }
92     pix += step;
93     vpix32 = vmov_n_u32(*pix);
94     vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
95     vx = vaddq_u32(vx, vmull_n_u16(vpix16, i));
96     return vx;
97 }
98 
99 template<bool RGB>
qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo * isi,unsigned int * dest,int dw,int dh,int dow,int sow)100 void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *dest,
101                                            int dw, int dh, int dow, int sow)
102 {
103     const unsigned int **ypoints = isi->ypoints;
104     int *xpoints = isi->xpoints;
105     int *xapoints = isi->xapoints;
106     int *yapoints = isi->yapoints;
107 
108     /* go through every scanline in the output buffer */
109     auto scaleSection = [&] (int yStart, int yEnd) {
110         for (int y = yStart; y < yEnd; ++y) {
111             int Cy = yapoints[y] >> 16;
112             int yap = yapoints[y] & 0xffff;
113 
114             unsigned int *dptr = dest + (y * dow);
115             for (int x = 0; x < dw; x++) {
116                 const unsigned int *sptr = ypoints[y] + xpoints[x];
117                 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow);
118 
119                 int xap = xapoints[x];
120                 if (xap > 0) {
121                     uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow);
122 
123                     vx = vmulq_n_u32(vx, 256 - xap);
124                     vr = vmulq_n_u32(vr, xap);
125                     vx = vaddq_u32(vx, vr);
126                     vx = vshrq_n_u32(vx, 8);
127                 }
128                 vx = vshrq_n_u32(vx, 14);
129                 const uint16x4_t vx16 = vmovn_u32(vx);
130                 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
131                 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
132                 if (RGB)
133                     *dptr |= 0xff000000;
134                 dptr++;
135             }
136         }
137     };
138     multithread_pixels_function(isi, dh, scaleSection);
139 }
140 
141 template<bool RGB>
qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo * isi,unsigned int * dest,int dw,int dh,int dow,int sow)142 void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *dest,
143                                            int dw, int dh, int dow, int sow)
144 {
145     const unsigned int **ypoints = isi->ypoints;
146     int *xpoints = isi->xpoints;
147     int *xapoints = isi->xapoints;
148     int *yapoints = isi->yapoints;
149 
150     /* go through every scanline in the output buffer */
151     auto scaleSection = [&] (int yStart, int yEnd) {
152         for (int y = yStart; y < yEnd; ++y) {
153             unsigned int *dptr = dest + (y * dow);
154             for (int x = 0; x < dw; x++) {
155                 int Cx = xapoints[x] >> 16;
156                 int xap = xapoints[x] & 0xffff;
157 
158                 const unsigned int *sptr = ypoints[y] + xpoints[x];
159                 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
160 
161                 int yap = yapoints[y];
162                 if (yap > 0) {
163                     uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1);
164 
165                     vx = vmulq_n_u32(vx, 256 - yap);
166                     vr = vmulq_n_u32(vr, yap);
167                     vx = vaddq_u32(vx, vr);
168                     vx = vshrq_n_u32(vx, 8);
169                 }
170                 vx = vshrq_n_u32(vx, 14);
171                 const uint16x4_t vx16 = vmovn_u32(vx);
172                 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
173                 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
174                 if (RGB)
175                     *dptr |= 0xff000000;
176                 dptr++;
177             }
178         }
179     };
180     multithread_pixels_function(isi, dh, scaleSection);
181 }
182 
183 template<bool RGB>
qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo * isi,unsigned int * dest,int dw,int dh,int dow,int sow)184 void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest,
185                                        int dw, int dh, int dow, int sow)
186 {
187     const unsigned int **ypoints = isi->ypoints;
188     int *xpoints = isi->xpoints;
189     int *xapoints = isi->xapoints;
190     int *yapoints = isi->yapoints;
191 
192     auto scaleSection = [&] (int yStart, int yEnd) {
193         for (int y = yStart; y < yEnd; ++y) {
194             int Cy = yapoints[y] >> 16;
195             int yap = yapoints[y] & 0xffff;
196 
197             unsigned int *dptr = dest + (y * dow);
198             for (int x = 0; x < dw; x++) {
199                 const int Cx = xapoints[x] >> 16;
200                 const int xap = xapoints[x] & 0xffff;
201 
202                 const unsigned int *sptr = ypoints[y] + xpoints[x];
203                 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
204                 vx = vshrq_n_u32(vx, 4);
205                 uint32x4_t vr = vmulq_n_u32(vx, yap);
206 
207                 int j;
208                 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
209                     sptr += sow;
210                     vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
211                     vx = vshrq_n_u32(vx, 4);
212                     vx = vmulq_n_u32(vx, Cy);
213                     vr = vaddq_u32(vr, vx);
214                 }
215                 sptr += sow;
216                 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
217                 vx = vshrq_n_u32(vx, 4);
218                 vx = vmulq_n_u32(vx, j);
219                 vr = vaddq_u32(vr, vx);
220 
221                 vx = vshrq_n_u32(vr, 24);
222                 const uint16x4_t vx16 = vmovn_u32(vx);
223                 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
224                 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
225                 if (RGB)
226                     *dptr |= 0xff000000;
227                 dptr++;
228             }
229         }
230     };
231     multithread_pixels_function(isi, dh, scaleSection);
232 }
233 
234 template void qt_qimageScaleAARGBA_up_x_down_y_neon<false>(QImageScaleInfo *isi, unsigned int *dest,
235                                                            int dw, int dh, int dow, int sow);
236 
237 template void qt_qimageScaleAARGBA_up_x_down_y_neon<true>(QImageScaleInfo *isi, unsigned int *dest,
238                                                           int dw, int dh, int dow, int sow);
239 
240 template void qt_qimageScaleAARGBA_down_x_up_y_neon<false>(QImageScaleInfo *isi, unsigned int *dest,
241                                                            int dw, int dh, int dow, int sow);
242 
243 template void qt_qimageScaleAARGBA_down_x_up_y_neon<true>(QImageScaleInfo *isi, unsigned int *dest,
244                                                           int dw, int dh, int dow, int sow);
245 
246 template void qt_qimageScaleAARGBA_down_xy_neon<false>(QImageScaleInfo *isi, unsigned int *dest,
247                                                        int dw, int dh, int dow, int sow);
248 
249 template void qt_qimageScaleAARGBA_down_xy_neon<true>(QImageScaleInfo *isi, unsigned int *dest,
250                                                       int dw, int dh, int dow, int sow);
251 
252 QT_END_NAMESPACE
253 
254 #endif
255