1 ////////////////////////////////////////////////////////////////
2 //
3 // this code was taken from http://shibatch.sourceforge.net/
4 // Many thanks to the author of original version: Naoki Shibata
5 //
6 // This version contains modifications made by Ingo Weyrich
7 //
8 ////////////////////////////////////////////////////////////////
9
10 #ifndef __SSE2__
11 #error Please specify -msse2.
12 #endif
13
14 #ifdef __GNUC__
15 #define INLINE __inline
16 #else
17 #define INLINE inline
18 #endif
19
20 #include <x86intrin.h>
21
22 #include <stdint.h>
23
24 typedef __m128d vdouble;
25 typedef __m128i vint;
26 typedef __m128i vmask;
27
28 typedef __m128 vfloat;
29 typedef __m128i vint2;
30
31 //
32 #ifdef __GNUC__
33 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ ))
34 #define LVF(x) _mm_load_ps((float*)&x)
35 #define LVFU(x) _mm_loadu_ps(&x)
36 #define STVF(x,y) _mm_store_ps(&x,y)
37 #define STVFU(x,y) _mm_storeu_ps(&x,y)
38 #define LVI(x) _mm_load_si128((__m128i*)&x)
39 #else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
40 #define LVF(x) _mm_loadu_ps((float*)&x)
41 #define LVFU(x) _mm_loadu_ps(&x)
42 #define STVF(x,y) _mm_storeu_ps(&x,y)
43 #define STVFU(x,y) _mm_storeu_ps(&x,y)
44 #define LVI(x) _mm_loadu_si128((__m128i*)&x)
45 #endif
46 #else
47 #define LVF(x) _mm_load_ps((float*)&x)
48 #define LVFU(x) _mm_loadu_ps(&x)
49 #define STVF(x,y) _mm_store_ps(&x,y)
50 #define STVFU(x,y) _mm_storeu_ps(&x,y)
51 #define LVI(x) _mm_load_si128((__m128i*)&x)
52 #endif
53
54 #if defined(__x86_64__) && defined(__AVX__)
55 #define PERMUTEPS(a,mask) _mm_permute_ps(a,mask)
56 #else
57 #define PERMUTEPS(a,mask) _mm_shuffle_ps(a,a,mask)
58 #endif
59
LC2VFU(float & a)60 static INLINE vfloat LC2VFU(float &a)
61 {
62 // Load 8 floats from a and combine a[0],a[2],a[4] and a[6] into a vector of 4 floats
63 vfloat a1 = _mm_loadu_ps( &a );
64 vfloat a2 = _mm_loadu_ps( (&a) + 4 );
65 return _mm_shuffle_ps(a1,a2,_MM_SHUFFLE( 2,0,2,0 ));
66 }
67
68
69 // Store a vector of 4 floats in a[0],a[2],a[4] and a[6]
70 #if defined(__x86_64__) && defined(__SSE4_1__)
71 // SSE4.1 => use _mm_blend_ps instead of _mm_set_epi32 and vself
72 #define STC2VFU(a,v) {\
73 __m128 TST1V = _mm_loadu_ps(&a);\
74 __m128 TST2V = _mm_unpacklo_ps(v,v);\
75 _mm_storeu_ps(&a, _mm_blend_ps(TST1V,TST2V,5));\
76 TST1V = _mm_loadu_ps((&a)+4);\
77 TST2V = _mm_unpackhi_ps(v,v);\
78 _mm_storeu_ps((&a)+4, _mm_blend_ps(TST1V,TST2V,5));\
79 }
80 #else
81 #define STC2VFU(a,v) {\
82 __m128 TST1V = _mm_loadu_ps(&a);\
83 __m128 TST2V = _mm_unpacklo_ps(v,v);\
84 vmask cmask = _mm_set_epi32(0xffffffff,0,0xffffffff,0);\
85 _mm_storeu_ps(&a, vself(cmask,TST1V,TST2V));\
86 TST1V = _mm_loadu_ps((&a)+4);\
87 TST2V = _mm_unpackhi_ps(v,v);\
88 _mm_storeu_ps((&a)+4, vself(cmask,TST1V,TST2V));\
89 }
90 #endif
91
92 #define ZEROV _mm_setzero_ps()
93 #define F2V(a) _mm_set1_ps((a))
94
vrint_vi_vd(vdouble vd)95 static INLINE vint vrint_vi_vd(vdouble vd)
96 {
97 return _mm_cvtpd_epi32(vd);
98 }
vtruncate_vi_vd(vdouble vd)99 static INLINE vint vtruncate_vi_vd(vdouble vd)
100 {
101 return _mm_cvttpd_epi32(vd);
102 }
vcast_vd_vi(vint vi)103 static INLINE vdouble vcast_vd_vi(vint vi)
104 {
105 return _mm_cvtepi32_pd(vi);
106 }
vcast_vd_d(double d)107 static INLINE vdouble vcast_vd_d(double d)
108 {
109 return _mm_set_pd(d, d);
110 }
vcast_vi_i(int i)111 static INLINE vint vcast_vi_i(int i)
112 {
113 return _mm_set_epi32(0, 0, i, i);
114 }
115
vreinterpret_vm_vd(vdouble vd)116 static INLINE vmask vreinterpret_vm_vd(vdouble vd)
117 {
118 return (__m128i)vd;
119 }
vreinterpret_vd_vm(vint vm)120 static INLINE vdouble vreinterpret_vd_vm(vint vm)
121 {
122 return (__m128d)vm;
123 }
124
vreinterpret_vm_vf(vfloat vf)125 static INLINE vmask vreinterpret_vm_vf(vfloat vf)
126 {
127 return (__m128i)vf;
128 }
vreinterpret_vf_vm(vmask vm)129 static INLINE vfloat vreinterpret_vf_vm(vmask vm)
130 {
131 return (__m128)vm;
132 }
133
134 //
135
vcast_vf_f(float f)136 static INLINE vfloat vcast_vf_f(float f)
137 {
138 return _mm_set_ps(f, f, f, f);
139 }
140
141 // Don't use intrinsics here. Newer gcc versions (>= 4.9, maybe also before 4.9) generate better code when not using intrinsics
142 // example: vaddf(vmulf(a,b),c) will generate an FMA instruction when build for chips with that feature only when vaddf and vmulf don't use intrinsics
vaddf(vfloat x,vfloat y)143 static INLINE vfloat vaddf(vfloat x, vfloat y)
144 {
145 return x + y;
146 }
vsubf(vfloat x,vfloat y)147 static INLINE vfloat vsubf(vfloat x, vfloat y)
148 {
149 return x - y;
150 }
vmulf(vfloat x,vfloat y)151 static INLINE vfloat vmulf(vfloat x, vfloat y)
152 {
153 return x * y;
154 }
vdivf(vfloat x,vfloat y)155 static INLINE vfloat vdivf(vfloat x, vfloat y)
156 {
157 return x / y;
158 }
159 // Also don't use intrinsic here: Some chips support FMA instructions with 3 and 4 operands
160 // 3 operands: a = a*b+c, b = a*b+c, c = a*b+c // destination has to be one of a,b,c
161 // 4 operands: d = a*b+c // destination does not have to be one of a,b,c
162 // gcc will use the one which fits best when not using intrinsics. With using intrinsics that's not possible
vmlaf(vfloat x,vfloat y,vfloat z)163 static INLINE vfloat vmlaf(vfloat x, vfloat y, vfloat z) {
164 return x * y + z;
165 }
vrecf(vfloat x)166 static INLINE vfloat vrecf(vfloat x)
167 {
168 return vdivf(vcast_vf_f(1.0f), x);
169 }
vsqrtf(vfloat x)170 static INLINE vfloat vsqrtf(vfloat x)
171 {
172 return _mm_sqrt_ps(x);
173 }
vmaxf(vfloat x,vfloat y)174 static INLINE vfloat vmaxf(vfloat x, vfloat y)
175 {
176 return _mm_max_ps(x, y);
177 }
vminf(vfloat x,vfloat y)178 static INLINE vfloat vminf(vfloat x, vfloat y)
179 {
180 return _mm_min_ps(x, y);
181 }
182
183 //
184
vadd(vdouble x,vdouble y)185 static INLINE vdouble vadd(vdouble x, vdouble y)
186 {
187 return _mm_add_pd(x, y);
188 }
vsub(vdouble x,vdouble y)189 static INLINE vdouble vsub(vdouble x, vdouble y)
190 {
191 return _mm_sub_pd(x, y);
192 }
vmul(vdouble x,vdouble y)193 static INLINE vdouble vmul(vdouble x, vdouble y)
194 {
195 return _mm_mul_pd(x, y);
196 }
vdiv(vdouble x,vdouble y)197 static INLINE vdouble vdiv(vdouble x, vdouble y)
198 {
199 return _mm_div_pd(x, y);
200 }
vrec(vdouble x)201 static INLINE vdouble vrec(vdouble x)
202 {
203 return _mm_div_pd(_mm_set_pd(1, 1), x);
204 }
vsqrt(vdouble x)205 static INLINE vdouble vsqrt(vdouble x)
206 {
207 return _mm_sqrt_pd(x);
208 }
vmla(vdouble x,vdouble y,vdouble z)209 static INLINE vdouble vmla(vdouble x, vdouble y, vdouble z)
210 {
211 return vadd(vmul(x, y), z);
212 }
213
vmax(vdouble x,vdouble y)214 static INLINE vdouble vmax(vdouble x, vdouble y)
215 {
216 return _mm_max_pd(x, y);
217 }
vmin(vdouble x,vdouble y)218 static INLINE vdouble vmin(vdouble x, vdouble y)
219 {
220 return _mm_min_pd(x, y);
221 }
222
vabs(vdouble d)223 static INLINE vdouble vabs(vdouble d)
224 {
225 return (__m128d)_mm_andnot_pd(_mm_set_pd(-0.0, -0.0), d);
226 }
vneg(vdouble d)227 static INLINE vdouble vneg(vdouble d)
228 {
229 return (__m128d)_mm_xor_pd(_mm_set_pd(-0.0, -0.0), d);
230 }
231
232 //
233
vaddi(vint x,vint y)234 static INLINE vint vaddi(vint x, vint y)
235 {
236 return _mm_add_epi32(x, y);
237 }
vsubi(vint x,vint y)238 static INLINE vint vsubi(vint x, vint y)
239 {
240 return _mm_sub_epi32(x, y);
241 }
242
vandi(vint x,vint y)243 static INLINE vint vandi(vint x, vint y)
244 {
245 return _mm_and_si128(x, y);
246 }
vandnoti(vint x,vint y)247 static INLINE vint vandnoti(vint x, vint y)
248 {
249 return _mm_andnot_si128(x, y);
250 }
vori(vint x,vint y)251 static INLINE vint vori(vint x, vint y)
252 {
253 return _mm_or_si128(x, y);
254 }
vxori(vint x,vint y)255 static INLINE vint vxori(vint x, vint y)
256 {
257 return _mm_xor_si128(x, y);
258 }
259
vslli(vint x,int c)260 static INLINE vint vslli(vint x, int c)
261 {
262 return _mm_slli_epi32(x, c);
263 }
vsrli(vint x,int c)264 static INLINE vint vsrli(vint x, int c)
265 {
266 return _mm_srli_epi32(x, c);
267 }
vsrai(vint x,int c)268 static INLINE vint vsrai(vint x, int c)
269 {
270 return _mm_srai_epi32(x, c);
271 }
272
273 //
274
vandm(vmask x,vmask y)275 static INLINE vmask vandm(vmask x, vmask y)
276 {
277 return _mm_and_si128(x, y);
278 }
vandnotm(vmask x,vmask y)279 static INLINE vmask vandnotm(vmask x, vmask y)
280 {
281 return _mm_andnot_si128(x, y);
282 }
vorm(vmask x,vmask y)283 static INLINE vmask vorm(vmask x, vmask y)
284 {
285 return _mm_or_si128(x, y);
286 }
vxorm(vmask x,vmask y)287 static INLINE vmask vxorm(vmask x, vmask y)
288 {
289 return _mm_xor_si128(x, y);
290 }
vnotm(vmask x)291 static INLINE vmask vnotm(vmask x)
292 {
293 return _mm_xor_si128(x, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
294 }
295
vmask_eq(vdouble x,vdouble y)296 static INLINE vmask vmask_eq(vdouble x, vdouble y)
297 {
298 return (__m128i)_mm_cmpeq_pd(x, y);
299 }
vmask_neq(vdouble x,vdouble y)300 static INLINE vmask vmask_neq(vdouble x, vdouble y)
301 {
302 return (__m128i)_mm_cmpneq_pd(x, y);
303 }
vmask_lt(vdouble x,vdouble y)304 static INLINE vmask vmask_lt(vdouble x, vdouble y)
305 {
306 return (__m128i)_mm_cmplt_pd(x, y);
307 }
vmask_le(vdouble x,vdouble y)308 static INLINE vmask vmask_le(vdouble x, vdouble y)
309 {
310 return (__m128i)_mm_cmple_pd(x, y);
311 }
vmask_gt(vdouble x,vdouble y)312 static INLINE vmask vmask_gt(vdouble x, vdouble y)
313 {
314 return (__m128i)_mm_cmpgt_pd(x, y);
315 }
vmask_ge(vdouble x,vdouble y)316 static INLINE vmask vmask_ge(vdouble x, vdouble y)
317 {
318 return (__m128i)_mm_cmpge_pd(x, y);
319 }
320
vmaskf_eq(vfloat x,vfloat y)321 static INLINE vmask vmaskf_eq(vfloat x, vfloat y)
322 {
323 return (__m128i)_mm_cmpeq_ps(x, y);
324 }
vmaskf_neq(vfloat x,vfloat y)325 static INLINE vmask vmaskf_neq(vfloat x, vfloat y)
326 {
327 return (__m128i)_mm_cmpneq_ps(x, y);
328 }
vmaskf_lt(vfloat x,vfloat y)329 static INLINE vmask vmaskf_lt(vfloat x, vfloat y)
330 {
331 return (__m128i)_mm_cmplt_ps(x, y);
332 }
vmaskf_le(vfloat x,vfloat y)333 static INLINE vmask vmaskf_le(vfloat x, vfloat y)
334 {
335 return (__m128i)_mm_cmple_ps(x, y);
336 }
vmaskf_gt(vfloat x,vfloat y)337 static INLINE vmask vmaskf_gt(vfloat x, vfloat y)
338 {
339 return (__m128i)_mm_cmpgt_ps(x, y);
340 }
vmaskf_ge(vfloat x,vfloat y)341 static INLINE vmask vmaskf_ge(vfloat x, vfloat y)
342 {
343 return (__m128i)_mm_cmpge_ps(x, y);
344 }
345
346
vmaski_eq(vint x,vint y)347 static INLINE vmask vmaski_eq(vint x, vint y)
348 {
349 __m128 s = (__m128)_mm_cmpeq_epi32(x, y);
350 return (__m128i)_mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 0, 0));
351 }
352
vsel(vmask mask,vdouble x,vdouble y)353 static INLINE vdouble vsel(vmask mask, vdouble x, vdouble y)
354 {
355 return (__m128d)vorm(vandm(mask, (__m128i)x), vandnotm(mask, (__m128i)y));
356 }
357
vseli_lt(vdouble d0,vdouble d1,vint x,vint y)358 static INLINE vint vseli_lt(vdouble d0, vdouble d1, vint x, vint y)
359 {
360 vmask mask = (vmask)_mm_cmpeq_ps(_mm_cvtpd_ps((vdouble)vmask_lt(d0, d1)), _mm_set_ps(0, 0, 0, 0));
361 return vori(vandnoti(mask, x), vandi(mask, y));
362 }
363
364 //
365
vcast_vi2_vm(vmask vm)366 static INLINE vint2 vcast_vi2_vm(vmask vm)
367 {
368 return (vint2)vm;
369 }
vcast_vm_vi2(vint2 vi)370 static INLINE vmask vcast_vm_vi2(vint2 vi)
371 {
372 return (vmask)vi;
373 }
374
vrint_vi2_vf(vfloat vf)375 static INLINE vint2 vrint_vi2_vf(vfloat vf)
376 {
377 return _mm_cvtps_epi32(vf);
378 }
vtruncate_vi2_vf(vfloat vf)379 static INLINE vint2 vtruncate_vi2_vf(vfloat vf)
380 {
381 return _mm_cvttps_epi32(vf);
382 }
vcast_vf_vi2(vint2 vi)383 static INLINE vfloat vcast_vf_vi2(vint2 vi)
384 {
385 return _mm_cvtepi32_ps(vcast_vm_vi2(vi));
386 }
vcast_vi2_i(int i)387 static INLINE vint2 vcast_vi2_i(int i)
388 {
389 return _mm_set_epi32(i, i, i, i);
390 }
391
vaddi2(vint2 x,vint2 y)392 static INLINE vint2 vaddi2(vint2 x, vint2 y)
393 {
394 return vaddi(x, y);
395 }
vsubi2(vint2 x,vint2 y)396 static INLINE vint2 vsubi2(vint2 x, vint2 y)
397 {
398 return vsubi(x, y);
399 }
400
vandi2(vint2 x,vint2 y)401 static INLINE vint2 vandi2(vint2 x, vint2 y)
402 {
403 return vandi(x, y);
404 }
vandnoti2(vint2 x,vint2 y)405 static INLINE vint2 vandnoti2(vint2 x, vint2 y)
406 {
407 return vandnoti(x, y);
408 }
vori2(vint2 x,vint2 y)409 static INLINE vint2 vori2(vint2 x, vint2 y)
410 {
411 return vori(x, y);
412 }
vxori2(vint2 x,vint2 y)413 static INLINE vint2 vxori2(vint2 x, vint2 y)
414 {
415 return vxori(x, y);
416 }
417
vslli2(vint2 x,int c)418 static INLINE vint2 vslli2(vint2 x, int c)
419 {
420 return vslli(x, c);
421 }
vsrli2(vint2 x,int c)422 static INLINE vint2 vsrli2(vint2 x, int c)
423 {
424 return vsrli(x, c);
425 }
vsrai2(vint2 x,int c)426 static INLINE vint2 vsrai2(vint2 x, int c)
427 {
428 return vsrai(x, c);
429 }
430
vmaski2_eq(vint2 x,vint2 y)431 static INLINE vmask vmaski2_eq(vint2 x, vint2 y)
432 {
433 return _mm_cmpeq_epi32(x, y);
434 }
vseli2(vmask m,vint2 x,vint2 y)435 static INLINE vint2 vseli2(vmask m, vint2 x, vint2 y)
436 {
437 return vorm(vandm(m, x), vandnotm(m, y));
438 }
439
440 //
441
vcast_d_vd(vdouble v)442 static INLINE double vcast_d_vd(vdouble v)
443 {
444 double s[2];
445 _mm_storeu_pd(s, v);
446 return s[0];
447 }
448
vcast_f_vf(vfloat v)449 static INLINE float vcast_f_vf(vfloat v)
450 {
451 float s[4];
452 _mm_storeu_ps(s, v);
453 return s[0];
454 }
455
vsignbit(vdouble d)456 static INLINE vmask vsignbit(vdouble d)
457 {
458 return _mm_and_si128((__m128i)d, _mm_set_epi32(0x80000000, 0x0, 0x80000000, 0x0));
459 }
460
vsign(vdouble d)461 static INLINE vdouble vsign(vdouble d)
462 {
463 return (__m128d)_mm_or_si128((__m128i)_mm_set_pd(1, 1), _mm_and_si128((__m128i)d, _mm_set_epi32(0x80000000, 0x0, 0x80000000, 0x0)));
464 }
465
vmulsign(vdouble x,vdouble y)466 static INLINE vdouble vmulsign(vdouble x, vdouble y)
467 {
468 return (__m128d)vxori((__m128i)x, vsignbit(y));
469 }
470
vmask_isinf(vdouble d)471 static INLINE vmask vmask_isinf(vdouble d)
472 {
473 return (vmask)_mm_cmpeq_pd(vabs(d), _mm_set_pd(INFINITY, INFINITY));
474 }
475
vmask_ispinf(vdouble d)476 static INLINE vmask vmask_ispinf(vdouble d)
477 {
478 return (vmask)_mm_cmpeq_pd(d, _mm_set_pd(INFINITY, INFINITY));
479 }
480
vmask_isminf(vdouble d)481 static INLINE vmask vmask_isminf(vdouble d)
482 {
483 return (vmask)_mm_cmpeq_pd(d, _mm_set_pd(-INFINITY, -INFINITY));
484 }
485
vmask_isnan(vdouble d)486 static INLINE vmask vmask_isnan(vdouble d)
487 {
488 return (vmask)_mm_cmpneq_pd(d, d);
489 }
490
visinf(vdouble d)491 static INLINE vdouble visinf(vdouble d)
492 {
493 return (__m128d)_mm_and_si128(vmask_isinf(d), _mm_or_si128(vsignbit(d), (__m128i)_mm_set_pd(1, 1)));
494 }
495
visinf2(vdouble d,vdouble m)496 static INLINE vdouble visinf2(vdouble d, vdouble m)
497 {
498 return (__m128d)_mm_and_si128(vmask_isinf(d), _mm_or_si128(vsignbit(d), (__m128i)m));
499 }
500
501 //
502
vpow2i(vint q)503 static INLINE vdouble vpow2i(vint q)
504 {
505 q = _mm_add_epi32(_mm_set_epi32(0x0, 0x0, 0x3ff, 0x3ff), q);
506 q = (__m128i)_mm_shuffle_ps((__m128)q, (__m128)q, _MM_SHUFFLE(1, 3, 0, 3));
507 return (__m128d)_mm_slli_epi32(q, 20);
508 }
509
vldexp(vdouble x,vint q)510 static INLINE vdouble vldexp(vdouble x, vint q)
511 {
512 vint m = _mm_srai_epi32(q, 31);
513 m = _mm_slli_epi32(_mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(m, q), 9), m), 7);
514 q = _mm_sub_epi32(q, _mm_slli_epi32(m, 2));
515 vdouble y = vpow2i(m);
516 return vmul(vmul(vmul(vmul(vmul(x, y), y), y), y), vpow2i(q));
517 }
518
vilogbp1(vdouble d)519 static INLINE vint vilogbp1(vdouble d)
520 {
521 vint m = vmask_lt(d, vcast_vd_d(4.9090934652977266E-91));
522 d = vsel(m, vmul(vcast_vd_d(2.037035976334486E90), d), d);
523 __m128i q = _mm_and_si128((__m128i)d, _mm_set_epi32(((1 << 12) - 1) << 20, 0, ((1 << 12) - 1) << 20, 0));
524 q = _mm_srli_epi32(q, 20);
525 q = vorm(vandm (m, _mm_sub_epi32(q, _mm_set_epi32(300 + 0x3fe, 0, 300 + 0x3fe, 0))),
526 vandnotm(m, _mm_sub_epi32(q, _mm_set_epi32( 0x3fe, 0, 0x3fe, 0))));
527 q = (__m128i)_mm_shuffle_ps((__m128)q, (__m128)q, _MM_SHUFFLE(0, 0, 3, 1));
528 return q;
529 }
530
vupper(vdouble d)531 static INLINE vdouble vupper(vdouble d)
532 {
533 return (__m128d)_mm_and_si128((__m128i)d, _mm_set_epi32(0xffffffff, 0xf8000000, 0xffffffff, 0xf8000000));
534 }
535
536 //
537
538 typedef struct {
539 vdouble x, y;
540 } vdouble2;
541
dd(vdouble h,vdouble l)542 static INLINE vdouble2 dd(vdouble h, vdouble l)
543 {
544 vdouble2 ret = {h, l};
545 return ret;
546 }
547
vsel2(vmask mask,vdouble2 x,vdouble2 y)548 static INLINE vdouble2 vsel2(vmask mask, vdouble2 x, vdouble2 y)
549 {
550 return dd((__m128d)vorm(vandm(mask, (__m128i)x.x), vandnotm(mask, (__m128i)y.x)),
551 (__m128d)vorm(vandm(mask, (__m128i)x.y), vandnotm(mask, (__m128i)y.y)));
552 }
553
abs_d(vdouble2 x)554 static INLINE vdouble2 abs_d(vdouble2 x)
555 {
556 return dd((__m128d)_mm_xor_pd(_mm_and_pd(_mm_set_pd(-0.0, -0.0), x.x), x.x),
557 (__m128d)_mm_xor_pd(_mm_and_pd(_mm_set_pd(-0.0, -0.0), x.x), x.y));
558 }
559