1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtGui module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include <qimage.h>
41 #include <private/qimage_p.h>
42 #include <private/qsimd_p.h>
43 
44 #ifdef QT_COMPILER_SUPPORTS_SSSE3
45 
46 QT_BEGIN_NAMESPACE
47 
48 // Convert a scanline of RGB888 (src) to RGB32 (dst)
49 // src must be at least len * 3 bytes
50 // dst must be at least len * 4 bytes
qt_convert_rgb888_to_rgb32_ssse3(quint32 * dst,const uchar * src,int len)51 Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len)
52 {
53     int i = 0;
54 
55     // Prologue, align dst to 16 bytes.
56     ALIGNMENT_PROLOGUE_16BYTES(dst, i, len) {
57         dst[i] = qRgb(src[0], src[1], src[2]);
58         src += 3;
59     }
60 
61     // Mask the 4 first colors of the RGB888 vector
62     const __m128i shuffleMask = _mm_set_epi8(char(0xff), 9, 10, 11, char(0xff), 6, 7, 8, char(0xff), 3, 4, 5, char(0xff), 0, 1, 2);
63 
64     // Mask the 4 last colors of a RGB888 vector with an offset of 1 (so the last 3 bytes are RGB)
65     const __m128i shuffleMaskEnd = _mm_set_epi8(char(0xff), 13, 14, 15, char(0xff), 10, 11, 12, char(0xff), 7, 8, 9, char(0xff), 4, 5, 6);
66 
67     // Mask to have alpha = 0xff
68     const __m128i alphaMask = _mm_set1_epi32(0xff000000);
69 
70     const __m128i *inVectorPtr = (const __m128i *)src;
71     __m128i *dstVectorPtr = (__m128i *)(dst + i);
72 
73     for (; i < (len - 15); i += 16) { // one iteration in the loop converts 16 pixels
74         /*
75          RGB888 has 5 pixels per vector, + 1 byte from the next pixel. The idea here is
76          to load vectors of RGB888 and use palignr to select a vector out of two vectors.
77 
78          After 3 loads of RGB888 and 3 stores of RGB32, we have 4 pixels left in the last
79          vector of RGB888, we can mask it directly to get a last store or RGB32. After that,
80          the first next byte is a R, and we can loop for the next 16 pixels.
81 
82          The conversion itself is done with a byte permutation (pshufb).
83          */
84         __m128i firstSrcVector = _mm_lddqu_si128(inVectorPtr);
85         __m128i outputVector = _mm_shuffle_epi8(firstSrcVector, shuffleMask);
86         _mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
87         ++inVectorPtr;
88         ++dstVectorPtr;
89 
90         // There are 4 unused bytes left in srcVector, we need to load the next 16 bytes
91         // and load the next input with palignr
92         __m128i secondSrcVector = _mm_lddqu_si128(inVectorPtr);
93         __m128i srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 12);
94         outputVector = _mm_shuffle_epi8(srcVector, shuffleMask);
95         _mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
96         ++inVectorPtr;
97         ++dstVectorPtr;
98         firstSrcVector = secondSrcVector;
99 
100         // We now have 8 unused bytes left in firstSrcVector
101         secondSrcVector = _mm_lddqu_si128(inVectorPtr);
102         srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 8);
103         outputVector = _mm_shuffle_epi8(srcVector, shuffleMask);
104         _mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
105         ++inVectorPtr;
106         ++dstVectorPtr;
107 
108         // There are now 12 unused bytes in firstSrcVector.
109         // We can mask them directly, almost there.
110         outputVector = _mm_shuffle_epi8(secondSrcVector, shuffleMaskEnd);
111         _mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
112         ++dstVectorPtr;
113     }
114     src = (const uchar *)inVectorPtr;
115 
116     SIMD_EPILOGUE(i, len, 15) {
117         dst[i] = qRgb(src[0], src[1], src[2]);
118         src += 3;
119     }
120 }
121 
convert_RGB888_to_RGB32_ssse3(QImageData * dest,const QImageData * src,Qt::ImageConversionFlags)122 void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
123 {
124     Q_ASSERT(src->format == QImage::Format_RGB888 || src->format == QImage::Format_BGR888);
125     if (src->format == QImage::Format_BGR888)
126         Q_ASSERT(dest->format == QImage::Format_RGBX8888 || dest->format == QImage::Format_RGBA8888 || dest->format == QImage::Format_RGBA8888_Premultiplied);
127     else
128         Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied);
129     Q_ASSERT(src->width == dest->width);
130     Q_ASSERT(src->height == dest->height);
131 
132     const uchar *src_data = (uchar *) src->data;
133     quint32 *dest_data = (quint32 *) dest->data;
134 
135     for (int i = 0; i < src->height; ++i) {
136         qt_convert_rgb888_to_rgb32_ssse3(dest_data, src_data, src->width);
137         src_data += src->bytes_per_line;
138         dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line);
139     }
140 }
141 
142 QT_END_NAMESPACE
143 
144 #endif // QT_COMPILER_SUPPORTS_SSSE3
145