1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2011, Richard Lowe
14 */
15
16 #ifndef _FENV_INLINES_H
17 #define _FENV_INLINES_H
18
19 #ifdef __GNUC__
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24
25 #include <sys/types.h>
26
27 #ifndef __GNU_INLINE
28 #define __GNU_INLINE inline __attribute__((gnu_inline))
29 #endif
30
31 #if defined(__x86)
32
33 /*
34 * Floating point Control Word and Status Word
35 * Definition should actually be shared with x86
36 * (much of this 'amd64' code can be, in fact.)
37 */
38 union fp_cwsw {
39 uint32_t cwsw;
40 struct {
41 uint16_t cw;
42 uint16_t sw;
43 } words;
44 };
45
46 extern __GNU_INLINE void
__fenv_getcwsw(unsigned int * value)47 __fenv_getcwsw(unsigned int *value)
48 {
49 union fp_cwsw *u = (union fp_cwsw *)value;
50
51 __asm__ __volatile__(
52 "fstsw %0\n\t"
53 "fstcw %1\n\t"
54 : "=m" (u->words.cw), "=m" (u->words.sw));
55 }
56
57 extern __GNU_INLINE void
__fenv_setcwsw(const unsigned int * value)58 __fenv_setcwsw(const unsigned int *value)
59 {
60 union fp_cwsw cwsw;
61 short fenv[16];
62
63 cwsw.cwsw = *value;
64
65 __asm__ __volatile__(
66 "fstenv %0\n\t"
67 "movw %4,%1\n\t"
68 "movw %3,%2\n\t"
69 "fldenv %0\n\t"
70 "fwait\n\t"
71 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
72 : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
73 /* For practical purposes, we clobber the whole FPU */
74 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
75 "st(6)", "st(7)");
76 }
77
78 extern __GNU_INLINE void
__fenv_getmxcsr(unsigned int * value)79 __fenv_getmxcsr(unsigned int *value)
80 {
81 __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
82 }
83
84 extern __GNU_INLINE void
__fenv_setmxcsr(const unsigned int * value)85 __fenv_setmxcsr(const unsigned int *value)
86 {
87 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
88 }
89
90 extern __GNU_INLINE long double
f2xm1(long double x)91 f2xm1(long double x)
92 {
93 long double ret;
94
95 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
96 return (ret);
97 }
98
99 extern __GNU_INLINE long double
fyl2x(long double y,long double x)100 fyl2x(long double y, long double x)
101 {
102 long double ret;
103
104 __asm__ __volatile__("fyl2x"
105 : "=t" (ret)
106 : "0" (x), "u" (y)
107 : "st(1)", "cc");
108 return (ret);
109 }
110
111 extern __GNU_INLINE long double
fptan(long double x)112 fptan(long double x)
113 {
114 /*
115 * fptan pushes 1.0 then the result on completion, so we want to pop
116 * the FP stack twice, so we need a dummy value into which to pop it.
117 */
118 long double ret;
119 long double dummy;
120
121 __asm__ __volatile__("fptan"
122 : "=t" (dummy), "=u" (ret)
123 : "0" (x)
124 : "cc");
125 return (ret);
126 }
127
128 extern __GNU_INLINE long double
fpatan(long double x,long double y)129 fpatan(long double x, long double y)
130 {
131 long double ret;
132
133 __asm__ __volatile__("fpatan"
134 : "=t" (ret)
135 : "0" (y), "u" (x)
136 : "st(1)", "cc");
137 return (ret);
138 }
139
140 extern __GNU_INLINE long double
fxtract(long double x)141 fxtract(long double x)
142 {
143 __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
144 return (x);
145 }
146
147 extern __GNU_INLINE long double
fprem1(long double idend,long double div)148 fprem1(long double idend, long double div)
149 {
150 __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
151 return (div);
152 }
153
154 extern __GNU_INLINE long double
fprem(long double idend,long double div)155 fprem(long double idend, long double div)
156 {
157 __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
158 return (div);
159 }
160
161 extern __GNU_INLINE long double
fyl2xp1(long double y,long double x)162 fyl2xp1(long double y, long double x)
163 {
164 long double ret;
165
166 __asm__ __volatile__("fyl2xp1"
167 : "=t" (ret)
168 : "0" (x), "u" (y)
169 : "st(1)", "cc");
170 return (ret);
171 }
172
173 extern __GNU_INLINE long double
fsqrt(long double x)174 fsqrt(long double x)
175 {
176 __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
177 return (x);
178 }
179
180 extern __GNU_INLINE long double
fsincos(long double x)181 fsincos(long double x)
182 {
183 long double dummy;
184
185 __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
186 return (x);
187 }
188
189 extern __GNU_INLINE long double
frndint(long double x)190 frndint(long double x)
191 {
192 __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
193 return (x);
194 }
195
196 extern __GNU_INLINE long double
fscale(long double x,long double y)197 fscale(long double x, long double y)
198 {
199 long double ret;
200
201 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
202 return (ret);
203 }
204
205 extern __GNU_INLINE long double
fsin(long double x)206 fsin(long double x)
207 {
208 __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
209 return (x);
210 }
211
212 extern __GNU_INLINE long double
fcos(long double x)213 fcos(long double x)
214 {
215 __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
216 return (x);
217 }
218
219 extern __GNU_INLINE void
sse_cmpeqss(float * f1,float * f2,int * i1)220 sse_cmpeqss(float *f1, float *f2, int *i1)
221 {
222 __asm__ __volatile__(
223 "cmpeqss %2, %1\n\t"
224 "movss %1, %0"
225 : "=m" (*i1), "+x" (*f1)
226 : "x" (*f2)
227 : "cc");
228 }
229
230 extern __GNU_INLINE void
sse_cmpltss(float * f1,float * f2,int * i1)231 sse_cmpltss(float *f1, float *f2, int *i1)
232 {
233 __asm__ __volatile__(
234 "cmpltss %2, %1\n\t"
235 "movss %1, %0"
236 : "=m" (*i1), "+x" (*f1)
237 : "x" (*f2)
238 : "cc");
239 }
240
241 extern __GNU_INLINE void
sse_cmpless(float * f1,float * f2,int * i1)242 sse_cmpless(float *f1, float *f2, int *i1)
243 {
244 __asm__ __volatile__(
245 "cmpless %2, %1\n\t"
246 "movss %1, %0"
247 : "=m" (*i1), "+x" (*f1)
248 : "x" (*f2)
249 : "cc");
250 }
251
252 extern __GNU_INLINE void
sse_cmpunordss(float * f1,float * f2,int * i1)253 sse_cmpunordss(float *f1, float *f2, int *i1)
254 {
255 __asm__ __volatile__(
256 "cmpunordss %2, %1\n\t"
257 "movss %1, %0"
258 : "=m" (*i1), "+x" (*f1)
259 : "x" (*f2)
260 : "cc");
261 }
262
263 extern __GNU_INLINE void
sse_minss(float * f1,float * f2,float * f3)264 sse_minss(float *f1, float *f2, float *f3)
265 {
266 __asm__ __volatile__(
267 "minss %2, %1\n\t"
268 "movss %1, %0"
269 : "=m" (*f3), "+x" (*f1)
270 : "x" (*f2));
271 }
272
273 extern __GNU_INLINE void
sse_maxss(float * f1,float * f2,float * f3)274 sse_maxss(float *f1, float *f2, float *f3)
275 {
276 __asm__ __volatile__(
277 "maxss %2, %1\n\t"
278 "movss %1, %0"
279 : "=m" (*f3), "+x" (*f1)
280 : "x" (*f2));
281 }
282
283 extern __GNU_INLINE void
sse_addss(float * f1,float * f2,float * f3)284 sse_addss(float *f1, float *f2, float *f3)
285 {
286 __asm__ __volatile__(
287 "addss %2, %1\n\t"
288 "movss %1, %0"
289 : "=m" (*f3), "+x" (*f1)
290 : "x" (*f2));
291 }
292
293 extern __GNU_INLINE void
sse_subss(float * f1,float * f2,float * f3)294 sse_subss(float *f1, float *f2, float *f3)
295 {
296 __asm__ __volatile__(
297 "subss %2, %1\n\t"
298 "movss %1, %0"
299 : "=m" (*f3), "+x" (*f1)
300 : "x" (*f2));
301 }
302
303 extern __GNU_INLINE void
sse_mulss(float * f1,float * f2,float * f3)304 sse_mulss(float *f1, float *f2, float *f3)
305 {
306 __asm__ __volatile__(
307 "mulss %2, %1\n\t"
308 "movss %1, %0"
309 : "=m" (*f3), "+x" (*f1)
310 : "x" (*f2));
311 }
312
313 extern __GNU_INLINE void
sse_divss(float * f1,float * f2,float * f3)314 sse_divss(float *f1, float *f2, float *f3)
315 {
316 __asm__ __volatile__(
317 "divss %2, %1\n\t"
318 "movss %1, %0"
319 : "=m" (*f3), "+x" (*f1)
320 : "x" (*f2));
321 }
322
323 extern __GNU_INLINE void
sse_sqrtss(float * f1,float * f2)324 sse_sqrtss(float *f1, float *f2)
325 {
326 double tmp;
327
328 __asm__ __volatile__(
329 "sqrtss %2, %1\n\t"
330 "movss %1, %0"
331 : "=m" (*f2), "=x" (tmp)
332 : "m" (*f1));
333 }
334
335 extern __GNU_INLINE void
sse_ucomiss(float * f1,float * f2)336 sse_ucomiss(float *f1, float *f2)
337 {
338 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
339
340 }
341
342 extern __GNU_INLINE void
sse_comiss(float * f1,float * f2)343 sse_comiss(float *f1, float *f2)
344 {
345 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
346 }
347
348 extern __GNU_INLINE void
sse_cvtss2sd(float * f1,double * d1)349 sse_cvtss2sd(float *f1, double *d1)
350 {
351 double tmp;
352
353 __asm__ __volatile__(
354 "cvtss2sd %2, %1\n\t"
355 "movsd %1, %0"
356 : "=m" (*d1), "=x" (tmp)
357 : "m" (*f1));
358 }
359
360 extern __GNU_INLINE void
sse_cvtsi2ss(int * i1,float * f1)361 sse_cvtsi2ss(int *i1, float *f1)
362 {
363 double tmp;
364
365 __asm__ __volatile__(
366 "cvtsi2ss %2, %1\n\t"
367 "movss %1, %0"
368 : "=m" (*f1), "=x" (tmp)
369 : "m" (*i1));
370 }
371
372 extern __GNU_INLINE void
sse_cvttss2si(float * f1,int * i1)373 sse_cvttss2si(float *f1, int *i1)
374 {
375 int tmp;
376
377 __asm__ __volatile__(
378 "cvttss2si %2, %1\n\t"
379 "movl %1, %0"
380 : "=m" (*i1), "=r" (tmp)
381 : "m" (*f1));
382 }
383
384 extern __GNU_INLINE void
sse_cvtss2si(float * f1,int * i1)385 sse_cvtss2si(float *f1, int *i1)
386 {
387 int tmp;
388
389 __asm__ __volatile__(
390 "cvtss2si %2, %1\n\t"
391 "movl %1, %0"
392 : "=m" (*i1), "=r" (tmp)
393 : "m" (*f1));
394 }
395
396 #if defined(__amd64)
397 extern __GNU_INLINE void
sse_cvtsi2ssq(long long * ll1,float * f1)398 sse_cvtsi2ssq(long long *ll1, float *f1)
399 {
400 double tmp;
401
402 __asm__ __volatile__(
403 "cvtsi2ssq %2, %1\n\t"
404 "movss %1, %0"
405 : "=m" (*f1), "=x" (tmp)
406 : "m" (*ll1));
407 }
408
409 extern __GNU_INLINE void
sse_cvttss2siq(float * f1,long long * ll1)410 sse_cvttss2siq(float *f1, long long *ll1)
411 {
412 uint64_t tmp;
413
414 __asm__ __volatile__(
415 "cvttss2siq %2, %1\n\t"
416 "movq %1, %0"
417 : "=m" (*ll1), "=r" (tmp)
418 : "m" (*f1));
419 }
420
421 extern __GNU_INLINE void
sse_cvtss2siq(float * f1,long long * ll1)422 sse_cvtss2siq(float *f1, long long *ll1)
423 {
424 uint64_t tmp;
425
426 __asm__ __volatile__(
427 "cvtss2siq %2, %1\n\t"
428 "movq %1, %0"
429 : "=m" (*ll1), "=r" (tmp)
430 : "m" (*f1));
431 }
432
433 #endif
434
435 extern __GNU_INLINE void
sse_cmpeqsd(double * d1,double * d2,long long * ll1)436 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
437 {
438 __asm__ __volatile__(
439 "cmpeqsd %2,%1\n\t"
440 "movsd %1,%0"
441 : "=m" (*ll1), "+x" (*d1)
442 : "x" (*d2));
443 }
444
445 extern __GNU_INLINE void
sse_cmpltsd(double * d1,double * d2,long long * ll1)446 sse_cmpltsd(double *d1, double *d2, long long *ll1)
447 {
448 __asm__ __volatile__(
449 "cmpltsd %2,%1\n\t"
450 "movsd %1,%0"
451 : "=m" (*ll1), "+x" (*d1)
452 : "x" (*d2));
453 }
454
455 extern __GNU_INLINE void
sse_cmplesd(double * d1,double * d2,long long * ll1)456 sse_cmplesd(double *d1, double *d2, long long *ll1)
457 {
458 __asm__ __volatile__(
459 "cmplesd %2,%1\n\t"
460 "movsd %1,%0"
461 : "=m" (*ll1), "+x" (*d1)
462 : "x" (*d2));
463 }
464
465 extern __GNU_INLINE void
sse_cmpunordsd(double * d1,double * d2,long long * ll1)466 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
467 {
468 __asm__ __volatile__(
469 "cmpunordsd %2,%1\n\t"
470 "movsd %1,%0"
471 : "=m" (*ll1), "+x" (*d1)
472 : "x" (*d2));
473 }
474
475
476 extern __GNU_INLINE void
sse_minsd(double * d1,double * d2,double * d3)477 sse_minsd(double *d1, double *d2, double *d3)
478 {
479 __asm__ __volatile__(
480 "minsd %2,%1\n\t"
481 "movsd %1,%0"
482 : "=m" (*d3), "+x" (*d1)
483 : "x" (*d2));
484 }
485
486 extern __GNU_INLINE void
sse_maxsd(double * d1,double * d2,double * d3)487 sse_maxsd(double *d1, double *d2, double *d3)
488 {
489 __asm__ __volatile__(
490 "maxsd %2,%1\n\t"
491 "movsd %1,%0"
492 : "=m" (*d3), "+x" (*d1)
493 : "x" (*d2));
494 }
495
496 extern __GNU_INLINE void
sse_addsd(double * d1,double * d2,double * d3)497 sse_addsd(double *d1, double *d2, double *d3)
498 {
499 __asm__ __volatile__(
500 "addsd %2,%1\n\t"
501 "movsd %1,%0"
502 : "=m" (*d3), "+x" (*d1)
503 : "x" (*d2));
504 }
505
506 extern __GNU_INLINE void
sse_subsd(double * d1,double * d2,double * d3)507 sse_subsd(double *d1, double *d2, double *d3)
508 {
509 __asm__ __volatile__(
510 "subsd %2,%1\n\t"
511 "movsd %1,%0"
512 : "=m" (*d3), "+x" (*d1)
513 : "x" (*d2));
514 }
515
516 extern __GNU_INLINE void
sse_mulsd(double * d1,double * d2,double * d3)517 sse_mulsd(double *d1, double *d2, double *d3)
518 {
519 __asm__ __volatile__(
520 "mulsd %2,%1\n\t"
521 "movsd %1,%0"
522 : "=m" (*d3), "+x" (*d1)
523 : "x" (*d2));
524 }
525
526 extern __GNU_INLINE void
sse_divsd(double * d1,double * d2,double * d3)527 sse_divsd(double *d1, double *d2, double *d3)
528 {
529 __asm__ __volatile__(
530 "divsd %2,%1\n\t"
531 "movsd %1,%0"
532 : "=m" (*d3), "+x" (*d1)
533 : "x" (*d2));
534 }
535
536 extern __GNU_INLINE void
sse_sqrtsd(double * d1,double * d2)537 sse_sqrtsd(double *d1, double *d2)
538 {
539 double tmp;
540
541 __asm__ __volatile__(
542 "sqrtsd %2, %1\n\t"
543 "movsd %1, %0"
544 : "=m" (*d2), "=x" (tmp)
545 : "m" (*d1));
546 }
547
548 extern __GNU_INLINE void
sse_ucomisd(double * d1,double * d2)549 sse_ucomisd(double *d1, double *d2)
550 {
551 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
552 }
553
554 extern __GNU_INLINE void
sse_comisd(double * d1,double * d2)555 sse_comisd(double *d1, double *d2)
556 {
557 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
558 }
559
560 extern __GNU_INLINE void
sse_cvtsd2ss(double * d1,float * f1)561 sse_cvtsd2ss(double *d1, float *f1)
562 {
563 double tmp;
564
565 __asm__ __volatile__(
566 "cvtsd2ss %2,%1\n\t"
567 "movss %1,%0"
568 : "=m" (*f1), "=x" (tmp)
569 : "m" (*d1));
570 }
571
572 extern __GNU_INLINE void
sse_cvtsi2sd(int * i1,double * d1)573 sse_cvtsi2sd(int *i1, double *d1)
574 {
575 double tmp;
576 __asm__ __volatile__(
577 "cvtsi2sd %2,%1\n\t"
578 "movsd %1,%0"
579 : "=m" (*d1), "=x" (tmp)
580 : "m" (*i1));
581 }
582
583 extern __GNU_INLINE void
sse_cvttsd2si(double * d1,int * i1)584 sse_cvttsd2si(double *d1, int *i1)
585 {
586 int tmp;
587
588 __asm__ __volatile__(
589 "cvttsd2si %2,%1\n\t"
590 "movl %1,%0"
591 : "=m" (*i1), "=r" (tmp)
592 : "m" (*d1));
593 }
594
595 extern __GNU_INLINE void
sse_cvtsd2si(double * d1,int * i1)596 sse_cvtsd2si(double *d1, int *i1)
597 {
598 int tmp;
599
600 __asm__ __volatile__(
601 "cvtsd2si %2,%1\n\t"
602 "movl %1,%0"
603 : "=m" (*i1), "=r" (tmp)
604 : "m" (*d1));
605 }
606
607 #if defined(__amd64)
608 extern __GNU_INLINE void
sse_cvtsi2sdq(long long * ll1,double * d1)609 sse_cvtsi2sdq(long long *ll1, double *d1)
610 {
611 double tmp;
612
613 __asm__ __volatile__(
614 "cvtsi2sdq %2,%1\n\t"
615 "movsd %1,%0"
616 : "=m" (*d1), "=x" (tmp)
617 : "m" (*ll1));
618 }
619
620 extern __GNU_INLINE void
sse_cvttsd2siq(double * d1,long long * ll1)621 sse_cvttsd2siq(double *d1, long long *ll1)
622 {
623 uint64_t tmp;
624
625 __asm__ __volatile__(
626 "cvttsd2siq %2,%1\n\t"
627 "movq %1,%0"
628 : "=m" (*ll1), "=r" (tmp)
629 : "m" (*d1));
630 }
631
632 extern __GNU_INLINE void
sse_cvtsd2siq(double * d1,long long * ll1)633 sse_cvtsd2siq(double *d1, long long *ll1)
634 {
635 uint64_t tmp;
636
637 __asm__ __volatile__(
638 "cvtsd2siq %2,%1\n\t"
639 "movq %1,%0"
640 : "=m" (*ll1), "=r" (tmp)
641 : "m" (*d1));
642 }
643 #endif
644
645 #elif defined(__sparc)
646 extern __GNU_INLINE void
647 __fenv_getfsr(unsigned long *l)
648 {
649 __asm__ __volatile__(
650 #if defined(__sparcv9)
651 "stx %%fsr,%0\n\t"
652 #else
653 "st %%fsr,%0\n\t"
654 #endif
655 : "=m" (*l));
656 }
657
658 extern __GNU_INLINE void
659 __fenv_setfsr(const unsigned long *l)
660 {
661 __asm__ __volatile__(
662 #if defined(__sparcv9)
663 "ldx %0,%%fsr\n\t"
664 #else
665 "ld %0,%%fsr\n\t"
666 #endif
667 : : "m" (*l) : "cc");
668 }
669
670 extern __GNU_INLINE void
671 __fenv_getfsr32(unsigned int *l)
672 {
673 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
674 }
675
676 extern __GNU_INLINE void
677 __fenv_setfsr32(const unsigned int *l)
678 {
679 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
680 }
681 #else
682 #error "GCC FENV inlines not implemented for this platform"
683 #endif
684
685 #ifdef __cplusplus
686 }
687 #endif
688
689 #endif /* __GNUC__ */
690
691 #endif /* _FENV_INLINES_H */
692