1 /*
2 Copyright 2014-2017 Jay Sorg
3
4 Permission to use, copy, modify, distribute, and sell this software and its
5 documentation for any purpose is hereby granted without fee, provided that
6 the above copyright notice appear in all copies and that both that
7 copyright notice and this permission notice appear in supporting
8 documentation.
9
10 The above copyright notice and this permission notice shall be included in
11 all copies or substantial portions of the Software.
12
13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
17 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19
20 yuv to rgb speed testing
21
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/time.h>
32
33 #if defined(USE_SIMD_AMD64)
34 #define a8r8g8b8_to_nv12_box_accel a8r8g8b8_to_nv12_box_amd64_sse2
35 #endif
36
37 #if defined(USE_SIMD_X86)
38 #define a8r8g8b8_to_nv12_box_accel a8r8g8b8_to_nv12_box_x86_sse2
39 #endif
40
41 /******************************************************************************/
42 //Y = ( ( 66 * R + 129 * G + 25 * B + 128) >> 8) + 16
43 //U = ( ( -38 * R - 74 * G + 112 * B + 128) >> 8) + 128
44 //V = ( ( 112 * R - 94 * G - 18 * B + 128) >> 8) + 128
45
46 //C = Y - 16
47 //D = U - 128
48 //E = V - 128
49 //R = clip(( 298 * C + 409 * E + 128) >> 8)
50 //G = clip(( 298 * C - 100 * D - 208 * E + 128) >> 8)
51 //B = clip(( 298 * C + 516 * D + 128) >> 8)
52
53 /******************************************************************************/
54 #define RDPCLAMP(_val, _lo, _hi) \
55 (_val) < (_lo) ? (_lo) : (_val) > (_hi) ? (_hi) : (_val)
56
57 // floating point
58 #define YUV2RGB1(_Y, _U, _V, _R, _G, _B) \
59 _Y = (0.257 * _R) + (0.504 * _G) + (0.098 * _B) + 16; \
60 _U = -(0.148 * _R) - (0.291 * _G) + (0.439 * _B) + 128; \
61 _V = (0.439 * _R) - (0.368 * _G) - (0.071 * _B) + 128;
62
63 #define YUV2RGB3(_Y, _U, _V, _R, _G, _B) \
64 _Y = (( 1053 * _R + 2064 * _G + 401 * _B) >> 12) + 16; \
65 _U = (( -606 * _R - 1192 * _G + 1798 * _B) >> 12) + 128; \
66 _V = (( 1798 * _R - 1507 * _G - 291 * _B) >> 12) + 128;
67
68 #define YUV2RGB2(_Y, _U, _V, _R, _G, _B) \
69 _Y = (( 16843 * _R + 33030 * _G + 6423 * _B) >> 16) + 16; \
70 _U = (( -9699 * _R - 19071 * _G + 28770 * _B) >> 16) + 128; \
71 _V = (( 28770 * _R - 24117 * _G - 4653 * _B) >> 16) + 128;
72
73 // original
74 #define YUV2RGB(_Y, _U, _V, _R, _G, _B) \
75 _Y = (( 66 * _R + 129 * _G + 25 * _B + 128) >> 8) + 16; \
76 _U = ((-38 * _R - 74 * _G + 112 * _B + 128) >> 8) + 128; \
77 _V = ((112 * _R - 94 * _G - 18 * _B + 128) >> 8) + 128;
78
79 #define YUV2RGB4(_Y, _U, _V, _R, _G, _B) \
80 _Y = ( ((1053 * ((_R) << 4)) >> 16) + ((2064 * ((_G) << 4)) >> 16) + (( 401 * ((_B) << 4)) >> 16)) + 16; \
81 _U = ( ((1798 * ((_B) << 4)) >> 16) - (( 606 * ((_R) << 4)) >> 16) - ((1192 * ((_G) << 4)) >> 16)) + 128; \
82 _V = ( ((1798 * ((_R) << 4)) >> 16) - ((1507 * ((_G) << 4)) >> 16) - (( 291 * ((_B) << 4)) >> 16)) + 128;
83
84 /******************************************************************************/
85 static int
a8r8g8b8_to_nv12_box(char * s8,int src_stride,char * d8_y,int dst_stride_y,char * d8_uv,int dst_stride_uv,int width,int height)86 a8r8g8b8_to_nv12_box(char *s8, int src_stride,
87 char *d8_y, int dst_stride_y,
88 char *d8_uv, int dst_stride_uv,
89 int width, int height)
90 {
91 int index;
92 int jndex;
93 int R;
94 int G;
95 int B;
96 int Y;
97 int U;
98 int V;
99 int U_sum;
100 int V_sum;
101 int pixel;
102 int lwidth;
103 int lheight;
104 int *s32a;
105 int *s32b;
106 char *d8ya;
107 char *d8yb;
108 char *d8uv;
109
110 /* must be even */
111 lwidth = width & ~1;
112 lheight = height & ~1;
113 for (jndex = 0; jndex < lheight; jndex += 2)
114 {
115 s32a = (int *) (s8 + src_stride * jndex);
116 s32b = (int *) (s8 + src_stride * (jndex + 1));
117 d8ya = d8_y + dst_stride_y * jndex;
118 d8yb = d8_y + dst_stride_y * (jndex + 1);
119 d8uv = d8_uv + dst_stride_uv * (jndex / 2);
120 for (index = 0; index < lwidth; index += 2)
121 {
122 U_sum = 0;
123 V_sum = 0;
124
125 pixel = s32a[0];
126 s32a++;
127 R = (pixel >> 16) & 0xff;
128 G = (pixel >> 8) & 0xff;
129 B = (pixel >> 0) & 0xff;
130 YUV2RGB(Y, U, V, R, G, B);
131 d8ya[0] = RDPCLAMP(Y, 0, 255);
132 d8ya++;
133 U_sum += RDPCLAMP(U, 0, 255);
134 V_sum += RDPCLAMP(V, 0, 255);
135
136 pixel = s32a[0];
137 s32a++;
138 R = (pixel >> 16) & 0xff;
139 G = (pixel >> 8) & 0xff;
140 B = (pixel >> 0) & 0xff;
141 YUV2RGB(Y, U, V, R, G, B);
142 d8ya[0] = RDPCLAMP(Y, 0, 255);
143 d8ya++;
144 U_sum += RDPCLAMP(U, 0, 255);
145 V_sum += RDPCLAMP(V, 0, 255);
146
147 pixel = s32b[0];
148 s32b++;
149 R = (pixel >> 16) & 0xff;
150 G = (pixel >> 8) & 0xff;
151 B = (pixel >> 0) & 0xff;
152 YUV2RGB(Y, U, V, R, G, B);
153 d8yb[0] = RDPCLAMP(Y, 0, 255);
154 d8yb++;
155 U_sum += RDPCLAMP(U, 0, 255);
156 V_sum += RDPCLAMP(V, 0, 255);
157
158 pixel = s32b[0];
159 s32b++;
160 R = (pixel >> 16) & 0xff;
161 G = (pixel >> 8) & 0xff;
162 B = (pixel >> 0) & 0xff;
163 YUV2RGB(Y, U, V, R, G, B);
164 d8yb[0] = RDPCLAMP(Y, 0, 255);
165 d8yb++;
166 U_sum += RDPCLAMP(U, 0, 255);
167 V_sum += RDPCLAMP(V, 0, 255);
168
169 d8uv[0] = (U_sum + 2) / 4;
170 d8uv++;
171 d8uv[0] = (V_sum + 2) / 4;
172 d8uv++;
173 }
174 }
175
176 return 0;
177 }
178
output_params(void)179 int output_params(void)
180 {
181 return 0;
182 }
183
hexdump(const void * p,int len)184 void hexdump(const void* p, int len)
185 {
186 const unsigned char* line;
187 int i;
188 int thisline;
189 int offset;
190
191 line = (const unsigned char *)p;
192 offset = 0;
193
194 while (offset < len)
195 {
196 printf("%04x ", offset);
197 thisline = len - offset;
198
199 if (thisline > 16)
200 {
201 thisline = 16;
202 }
203
204 for (i = 0; i < thisline; i++)
205 {
206 printf("%02x ", line[i]);
207 }
208
209 for (; i < 16; i++)
210 {
211 printf(" ");
212 }
213
214 for (i = 0; i < thisline; i++)
215 {
216 printf("%c", (line[i] >= 0x20 && line[i] < 0x7f) ? line[i] : '.');
217 }
218
219 printf("\n");
220 offset += thisline;
221 line += thisline;
222 }
223 }
224
lmemcmp(const void * data1,const void * data2,int bytes,int * offset)225 int lmemcmp(const void* data1, const void* data2, int bytes, int* offset)
226 {
227 int index;
228 int diff;
229 const unsigned char* ldata1;
230 const unsigned char* ldata2;
231
232 ldata1 = (const unsigned char*)data1;
233 ldata2 = (const unsigned char*)data2;
234
235 for (index = 0; index < bytes; index++)
236 {
237 diff = ldata1[index] - ldata2[index];
238 if (abs(diff) > 0)
239 {
240 *offset = index;
241 return 1;
242 }
243 }
244 return 0;
245 }
246
get_mstime(void)247 int get_mstime(void)
248 {
249 struct timeval tp;
250
251 gettimeofday(&tp, 0);
252 return (tp.tv_sec * 1000) + (tp.tv_usec / 1000);
253 }
254
255 int
256 a8r8g8b8_to_nv12_box_x86_sse2(char *s8, int src_stride,
257 char *d8_y, int dst_stride_y,
258 char *d8_uv, int dst_stride_uv,
259 int width, int height);
260 int
261 a8r8g8b8_to_nv12_box_amd64_sse2(char *s8, int src_stride,
262 char *d8_y, int dst_stride_y,
263 char *d8_uv, int dst_stride_uv,
264 int width, int height);
265
266 #define AL(_ptr) ((char*)((((size_t)_ptr) + 15) & ~15))
267
main(int argc,char ** argv)268 int main(int argc, char** argv)
269 {
270 int index;
271 int offset;
272 int fd;
273 int data_bytes;
274 int stime;
275 int etime;
276 int ret = 0;
277 char* rgb_data;
278 char* yuv_data1;
279 char* yuv_data2;
280 char* al_rgb_data;
281 char* al_yuv_data1;
282 char* al_yuv_data2;
283
284 if (argc == 1)
285 {
286 return output_params();
287 }
288 fd = open("/dev/urandom", O_RDONLY);
289 data_bytes = 1920 * 1080 * 4;
290 rgb_data = (char*)malloc(data_bytes + 16);
291 al_rgb_data = AL(rgb_data);
292 if (read(fd, al_rgb_data, data_bytes) != data_bytes)
293 {
294 printf("error\n");
295 }
296 close(fd);
297 data_bytes = 1920 * 1080 * 2;
298 yuv_data1 = (char*)malloc(data_bytes + 16);
299 yuv_data2 = (char*)malloc(data_bytes + 16);
300 al_yuv_data1 = AL(yuv_data1);
301 al_yuv_data2 = AL(yuv_data2);
302 stime = get_mstime();
303 for (index = 0; index < 100; index++)
304 {
305 a8r8g8b8_to_nv12_box(al_rgb_data, 1920 * 4,
306 al_yuv_data1, 1920,
307 al_yuv_data1 + 1920 * 1080,
308 1920, 1920, 1080);
309 }
310 etime = get_mstime();
311 printf("a8r8g8b8_to_nv12_box took %d\n", etime - stime);
312 stime = get_mstime();
313 for (index = 0; index < 100; index++)
314 {
315 a8r8g8b8_to_nv12_box_accel(al_rgb_data, 1920 * 4,
316 al_yuv_data2, 1920,
317 al_yuv_data2 + 1920 * 1080, 1920,
318 1920, 1080);
319 }
320 etime = get_mstime();
321 printf("a8r8g8b8_to_nv12_box_x86_sse2 took %d\n", etime - stime);
322 if (lmemcmp(al_yuv_data1, al_yuv_data2, 1920 * 1080 * 3 / 2, &offset) != 0)
323 {
324 ret = 1;
325 printf("no match at offset %d\n", offset);
326 printf("first\n");
327 hexdump(al_yuv_data1 + offset, 16);
328 printf("second\n");
329 hexdump(al_yuv_data2 + offset, 16);
330 }
331 else
332 {
333 printf("match\n");
334 }
335 free(rgb_data);
336 free(yuv_data1);
337 free(yuv_data2);
338 return ret;
339 }
340