1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtGui module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #ifndef QCOLORTRCLUT_P_H
41 #define QCOLORTRCLUT_P_H
42 
43 //
44 //  W A R N I N G
45 //  -------------
46 //
47 // This file is not part of the Qt API.  It exists purely as an
48 // implementation detail.  This header file may change from version to
49 // version without notice, or even be removed.
50 //
51 // We mean it.
52 //
53 
54 #include <QtGui/private/qtguiglobal_p.h>
55 #include <QtCore/qsharedpointer.h>
56 #include <QtGui/qrgb.h>
57 #include <QtGui/qrgba64.h>
58 
59 #include <cmath>
60 
61 #if defined(__SSE2__)
62 #include <emmintrin.h>
63 #elif defined(__ARM_NEON__) || defined(__ARM_NEON)
64 #include <arm_neon.h>
65 #endif
66 
67 QT_BEGIN_NAMESPACE
68 
69 class QColorTransferFunction;
70 class QColorTransferTable;
71 
72 class Q_GUI_EXPORT QColorTrcLut : public QEnableSharedFromThis<QColorTrcLut>
73 {
74 public:
75     static QColorTrcLut *fromGamma(qreal gamma);
76     static QColorTrcLut *fromTransferFunction(const QColorTransferFunction &transfn);
77     static QColorTrcLut *fromTransferTable(const QColorTransferTable &transTable);
78 
79     // The following methods all convert opaque or unpremultiplied colors:
80 
toLinear64(QRgb rgb32)81     QRgba64 toLinear64(QRgb rgb32) const
82     {
83 #if defined(__SSE2__)
84         __m128i v = _mm_cvtsi32_si128(rgb32);
85         v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
86         const __m128i vidx = _mm_slli_epi16(v, 4);
87         const int ridx = _mm_extract_epi16(vidx, 2);
88         const int gidx = _mm_extract_epi16(vidx, 1);
89         const int bidx = _mm_extract_epi16(vidx, 0);
90         v = _mm_slli_epi16(v, 8); // a * 256
91         v = _mm_insert_epi16(v, m_toLinear[ridx], 0);
92         v = _mm_insert_epi16(v, m_toLinear[gidx], 1);
93         v = _mm_insert_epi16(v, m_toLinear[bidx], 2);
94         v = _mm_add_epi16(v, _mm_srli_epi16(v, 8));
95         QRgba64 rgba64;
96         _mm_storel_epi64(reinterpret_cast<__m128i *>(&rgba64), v);
97         return rgba64;
98 #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
99         uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32));
100         uint16x4_t v16 = vget_low_u16(vmovl_u8(v8));
101         const uint16x4_t vidx = vshl_n_u16(v16, 4);
102         const int ridx = vget_lane_u16(vidx, 2);
103         const int gidx = vget_lane_u16(vidx, 1);
104         const int bidx = vget_lane_u16(vidx, 0);
105         v16 = vshl_n_u16(v16, 8); // a * 256
106         v16 = vset_lane_u16(m_toLinear[ridx], v16, 0);
107         v16 = vset_lane_u16(m_toLinear[gidx], v16, 1);
108         v16 = vset_lane_u16(m_toLinear[bidx], v16, 2);
109         v16 = vadd_u16(v16, vshr_n_u16(v16, 8));
110         return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v16), 0));
111 #else
112         uint r = m_toLinear[qRed(rgb32) << 4];
113         uint g = m_toLinear[qGreen(rgb32) << 4];
114         uint b = m_toLinear[qBlue(rgb32) << 4];
115         r = r + (r >> 8);
116         g = g + (g >> 8);
117         b = b + (b >> 8);
118         return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257);
119 #endif
120     }
121 
toLinear(QRgb rgb32)122     QRgb toLinear(QRgb rgb32) const
123     {
124         return convertWithTable(rgb32, m_toLinear);
125     }
126 
toLinear(QRgba64 rgb64)127     QRgba64 toLinear(QRgba64 rgb64) const
128     {
129         return convertWithTable(rgb64, m_toLinear);
130     }
131 
u8ToLinearF32(int c)132     float u8ToLinearF32(int c) const
133     {
134         ushort v = m_toLinear[c << 4];
135         return v * (1.0f / (255*256));
136     }
137 
u16ToLinearF32(int c)138     float u16ToLinearF32(int c) const
139     {
140         c -= (c >> 8);
141         ushort v = m_toLinear[c >> 4];
142         return v * (1.0f / (255*256));
143     }
144 
toLinear(float f)145     float toLinear(float f) const
146     {
147         ushort v = m_toLinear[(int)(f * (255 * 16) + 0.5f)];
148         return v * (1.0f / (255*256));
149     }
150 
fromLinear64(QRgba64 rgb64)151     QRgb fromLinear64(QRgba64 rgb64) const
152     {
153 #if defined(__SSE2__)
154         __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&rgb64));
155         v = _mm_sub_epi16(v, _mm_srli_epi16(v, 8));
156         const __m128i vidx = _mm_srli_epi16(v, 4);
157         const int ridx = _mm_extract_epi16(vidx, 0);
158         const int gidx = _mm_extract_epi16(vidx, 1);
159         const int bidx = _mm_extract_epi16(vidx, 2);
160         v = _mm_insert_epi16(v, m_fromLinear[ridx], 2);
161         v = _mm_insert_epi16(v, m_fromLinear[gidx], 1);
162         v = _mm_insert_epi16(v, m_fromLinear[bidx], 0);
163         v = _mm_add_epi16(v, _mm_set1_epi16(0x80));
164         v = _mm_srli_epi16(v, 8);
165         v = _mm_packus_epi16(v, v);
166         return _mm_cvtsi128_si32(v);
167 #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
168         uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64));
169         v = vsub_u16(v, vshr_n_u16(v, 8));
170         const uint16x4_t vidx = vshr_n_u16(v, 4);
171         const int ridx = vget_lane_u16(vidx, 0);
172         const int gidx = vget_lane_u16(vidx, 1);
173         const int bidx = vget_lane_u16(vidx, 2);
174         v = vset_lane_u16(m_fromLinear[ridx], v, 2);
175         v = vset_lane_u16(m_fromLinear[gidx], v, 1);
176         v = vset_lane_u16(m_fromLinear[bidx], v, 0);
177         uint8x8_t v8 = vrshrn_n_u16(vcombine_u16(v, v), 8);
178         return vget_lane_u32(vreinterpret_u32_u8(v8), 0);
179 #else
180         uint a = rgb64.alpha();
181         uint r = rgb64.red();
182         uint g = rgb64.green();
183         uint b = rgb64.blue();
184         a = a - (a >> 8);
185         r = r - (r >> 8);
186         g = g - (g >> 8);
187         b = b - (b >> 8);
188         a = (a + 0x80) >> 8;
189         r = (m_fromLinear[r >> 4] + 0x80) >> 8;
190         g = (m_fromLinear[g >> 4] + 0x80) >> 8;
191         b = (m_fromLinear[b >> 4] + 0x80) >> 8;
192         return (a << 24) | (r << 16) | (g << 8) | b;
193 #endif
194     }
195 
fromLinear(QRgb rgb32)196     QRgb fromLinear(QRgb rgb32) const
197     {
198         return convertWithTable(rgb32, m_fromLinear);
199     }
200 
fromLinear(QRgba64 rgb64)201     QRgba64 fromLinear(QRgba64 rgb64) const
202     {
203         return convertWithTable(rgb64, m_fromLinear);
204     }
205 
u8FromLinearF32(float f)206     int u8FromLinearF32(float f) const
207     {
208         ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
209         return (v + 0x80) >> 8;
210     }
u16FromLinearF32(float f)211     int u16FromLinearF32(float f) const
212     {
213         ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
214         return v + (v >> 8);
215     }
fromLinear(float f)216     float fromLinear(float f) const
217     {
218         ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
219         return v * (1.0f / (255*256));
220     }
221 
222     // We translate to 0-65280 (255*256) instead to 0-65535 to make simple
223     // shifting an accurate conversion.
224     // We translate from 0-4080 (255*16) for the same speed up, and to keep
225     // the tables small enough to fit in most inner caches.
226     ushort m_toLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280]
227     ushort m_fromLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280]
228 
229 private:
QColorTrcLut()230     QColorTrcLut() { }
231 
convertWithTable(QRgb rgb32,const ushort * table)232     Q_ALWAYS_INLINE static QRgb convertWithTable(QRgb rgb32, const ushort *table)
233     {
234         const int r = (table[qRed(rgb32) << 4] + 0x80) >> 8;
235         const int g = (table[qGreen(rgb32) << 4] + 0x80) >> 8;
236         const int b = (table[qBlue(rgb32) << 4] + 0x80) >> 8;
237         return (rgb32 & 0xff000000) | (r << 16) | (g << 8) | b;
238     }
convertWithTable(QRgba64 rgb64,const ushort * table)239     Q_ALWAYS_INLINE static QRgba64 convertWithTable(QRgba64 rgb64, const ushort *table)
240     {
241 #if defined(__SSE2__)
242         __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&rgb64));
243         v = _mm_sub_epi16(v, _mm_srli_epi16(v, 8));
244         const __m128i vidx = _mm_srli_epi16(v, 4);
245         const int ridx = _mm_extract_epi16(vidx, 2);
246         const int gidx = _mm_extract_epi16(vidx, 1);
247         const int bidx = _mm_extract_epi16(vidx, 0);
248         v = _mm_insert_epi16(v, table[ridx], 2);
249         v = _mm_insert_epi16(v, table[gidx], 1);
250         v = _mm_insert_epi16(v, table[bidx], 0);
251         v = _mm_add_epi16(v, _mm_srli_epi16(v, 8));
252         QRgba64 rgba64;
253         _mm_storel_epi64(reinterpret_cast<__m128i *>(&rgba64), v);
254         return rgba64;
255 #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
256         uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64));
257         v = vsub_u16(v, vshr_n_u16(v, 8));
258         const uint16x4_t vidx = vshr_n_u16(v, 4);
259         const int ridx = vget_lane_u16(vidx, 2);
260         const int gidx = vget_lane_u16(vidx, 1);
261         const int bidx = vget_lane_u16(vidx, 0);
262         v = vset_lane_u16(table[ridx], v, 2);
263         v = vset_lane_u16(table[gidx], v, 1);
264         v = vset_lane_u16(table[bidx], v, 0);
265         v = vadd_u16(v, vshr_n_u16(v, 8));
266         return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v), 0));
267 #else
268         ushort r = rgb64.red();
269         ushort g = rgb64.green();
270         ushort b = rgb64.blue();
271         r = r - (r >> 8);
272         g = g - (g >> 8);
273         b = b - (b >> 8);
274         r = table[r >> 4];
275         g = table[g >> 4];
276         b = table[b >> 4];
277         r = r + (r >> 8);
278         g = g + (g >> 8);
279         b = b + (b >> 8);
280         return QRgba64::fromRgba64(r, g, b, rgb64.alpha());
281 #endif
282     }
283 };
284 
285 QT_END_NAMESPACE
286 
287 #endif // QCOLORTRCLUT_P_H
288