1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2011, Richard Lowe
14  */
15 
16 #ifndef _FENV_INLINES_H
17 #define	_FENV_INLINES_H
18 
19 #ifdef __GNUC__
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 #include <sys/types.h>
26 
27 #ifndef __GNU_INLINE
28 #define __GNU_INLINE inline __attribute__((gnu_inline))
29 #endif
30 
31 #if defined(__x86)
32 
33 /*
34  * Floating point Control Word and Status Word
35  * Definition should actually be shared with x86
36  * (much of this 'amd64' code can be, in fact.)
37  */
38 union fp_cwsw {
39 	uint32_t cwsw;
40 	struct {
41 		uint16_t cw;
42 		uint16_t sw;
43 	} words;
44 };
45 
46 extern __GNU_INLINE void
__fenv_getcwsw(unsigned int * value)47 __fenv_getcwsw(unsigned int *value)
48 {
49 	union fp_cwsw *u = (union fp_cwsw *)value;
50 
51 	__asm__ __volatile__(
52 	    "fstsw %0\n\t"
53 	    "fstcw %1\n\t"
54 	    : "=m" (u->words.cw), "=m" (u->words.sw));
55 }
56 
57 extern __GNU_INLINE void
__fenv_setcwsw(const unsigned int * value)58 __fenv_setcwsw(const unsigned int *value)
59 {
60 	union fp_cwsw cwsw;
61 	short fenv[16];
62 
63 	cwsw.cwsw = *value;
64 
65 	__asm__ __volatile__(
66 	    "fstenv %0\n\t"
67 	    "movw   %4,%1\n\t"
68 	    "movw   %3,%2\n\t"
69 	    "fldenv %0\n\t"
70 	    "fwait\n\t"
71 	    : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
72 	    : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
73 	    /* For practical purposes, we clobber the whole FPU */
74 	    : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
75 	      "st(6)", "st(7)");
76 }
77 
78 extern __GNU_INLINE void
__fenv_getmxcsr(unsigned int * value)79 __fenv_getmxcsr(unsigned int *value)
80 {
81 	__asm__ __volatile__("stmxcsr %0" : "=m" (*value));
82 }
83 
84 extern __GNU_INLINE void
__fenv_setmxcsr(const unsigned int * value)85 __fenv_setmxcsr(const unsigned int *value)
86 {
87 	__asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
88 }
89 
90 extern __GNU_INLINE long double
f2xm1(long double x)91 f2xm1(long double x)
92 {
93 	long double ret;
94 
95 	__asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
96 	return (ret);
97 }
98 
99 extern __GNU_INLINE long double
fyl2x(long double y,long double x)100 fyl2x(long double y, long double x)
101 {
102 	long double ret;
103 
104 	__asm__ __volatile__("fyl2x"
105 	    : "=t" (ret)
106 	    : "0" (x), "u" (y)
107 	    : "st(1)", "cc");
108 	return (ret);
109 }
110 
111 extern __GNU_INLINE long double
fptan(long double x)112 fptan(long double x)
113 {
114 	/*
115 	 * fptan pushes 1.0 then the result on completion, so we want to pop
116 	 * the FP stack twice, so we need a dummy value into which to pop it.
117 	 */
118 	long double ret;
119 	long double dummy;
120 
121 	__asm__ __volatile__("fptan"
122 	    : "=t" (dummy), "=u" (ret)
123 	    : "0" (x)
124 	    : "cc");
125 	return (ret);
126 }
127 
128 extern __GNU_INLINE long double
fpatan(long double x,long double y)129 fpatan(long double x, long double y)
130 {
131 	long double ret;
132 
133 	__asm__ __volatile__("fpatan"
134 	    : "=t" (ret)
135 	    : "0" (y), "u" (x)
136 	    : "st(1)", "cc");
137 	return (ret);
138 }
139 
140 extern __GNU_INLINE long double
fxtract(long double x)141 fxtract(long double x)
142 {
143 	__asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
144 	return (x);
145 }
146 
147 extern __GNU_INLINE long double
fprem1(long double idend,long double div)148 fprem1(long double idend, long double div)
149 {
150 	__asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
151 	return (div);
152 }
153 
154 extern __GNU_INLINE long double
fprem(long double idend,long double div)155 fprem(long double idend, long double div)
156 {
157 	__asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
158 	return (div);
159 }
160 
161 extern __GNU_INLINE long double
fyl2xp1(long double y,long double x)162 fyl2xp1(long double y, long double x)
163 {
164 	long double ret;
165 
166 	__asm__ __volatile__("fyl2xp1"
167 	    : "=t" (ret)
168 	    : "0" (x), "u" (y)
169 	    : "st(1)", "cc");
170 	return (ret);
171 }
172 
173 extern __GNU_INLINE long double
fsqrt(long double x)174 fsqrt(long double x)
175 {
176 	__asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
177 	return (x);
178 }
179 
180 extern __GNU_INLINE long double
fsincos(long double x)181 fsincos(long double x)
182 {
183 	long double dummy;
184 
185 	__asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
186 	return (x);
187 }
188 
189 extern __GNU_INLINE long double
frndint(long double x)190 frndint(long double x)
191 {
192 	__asm__ __volatile__("frndint" : "+t" (x) : : "cc");
193 	return (x);
194 }
195 
196 extern __GNU_INLINE long double
fscale(long double x,long double y)197 fscale(long double x, long double y)
198 {
199 	long double ret;
200 
201 	__asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
202 	return (ret);
203 }
204 
205 extern __GNU_INLINE long double
fsin(long double x)206 fsin(long double x)
207 {
208 	__asm__ __volatile__("fsin" : "+t" (x) : : "cc");
209 	return (x);
210 }
211 
212 extern __GNU_INLINE long double
fcos(long double x)213 fcos(long double x)
214 {
215 	__asm__ __volatile__("fcos" : "+t" (x) : : "cc");
216 	return (x);
217 }
218 
219 extern __GNU_INLINE void
sse_cmpeqss(float * f1,float * f2,int * i1)220 sse_cmpeqss(float *f1, float *f2, int *i1)
221 {
222 	__asm__ __volatile__(
223 	    "cmpeqss %2, %1\n\t"
224 	    "movss   %1, %0"
225 	    : "=m" (*i1), "+x" (*f1)
226 	    : "x" (*f2)
227 	    : "cc");
228 }
229 
230 extern __GNU_INLINE void
sse_cmpltss(float * f1,float * f2,int * i1)231 sse_cmpltss(float *f1, float *f2, int *i1)
232 {
233 	__asm__ __volatile__(
234 	    "cmpltss %2, %1\n\t"
235 	    "movss   %1, %0"
236 	    : "=m" (*i1), "+x" (*f1)
237 	    : "x" (*f2)
238 	    : "cc");
239 }
240 
241 extern __GNU_INLINE void
sse_cmpless(float * f1,float * f2,int * i1)242 sse_cmpless(float *f1, float *f2, int *i1)
243 {
244 	__asm__ __volatile__(
245 	    "cmpless %2, %1\n\t"
246 	    "movss   %1, %0"
247 	    : "=m" (*i1), "+x" (*f1)
248 	    : "x" (*f2)
249 	    : "cc");
250 }
251 
252 extern __GNU_INLINE void
sse_cmpunordss(float * f1,float * f2,int * i1)253 sse_cmpunordss(float *f1, float *f2, int *i1)
254 {
255 	__asm__ __volatile__(
256 	    "cmpunordss %2, %1\n\t"
257 	    "movss      %1, %0"
258 	    : "=m" (*i1), "+x" (*f1)
259 	    : "x" (*f2)
260 	    : "cc");
261 }
262 
263 extern __GNU_INLINE void
sse_minss(float * f1,float * f2,float * f3)264 sse_minss(float *f1, float *f2, float *f3)
265 {
266 	__asm__ __volatile__(
267 	    "minss %2, %1\n\t"
268 	    "movss %1, %0"
269 	    : "=m" (*f3), "+x" (*f1)
270 	    : "x" (*f2));
271 }
272 
273 extern __GNU_INLINE void
sse_maxss(float * f1,float * f2,float * f3)274 sse_maxss(float *f1, float *f2, float *f3)
275 {
276 	__asm__ __volatile__(
277 	    "maxss %2, %1\n\t"
278 	    "movss %1, %0"
279 	    : "=m" (*f3), "+x" (*f1)
280 	    : "x" (*f2));
281 }
282 
283 extern __GNU_INLINE void
sse_addss(float * f1,float * f2,float * f3)284 sse_addss(float *f1, float *f2, float *f3)
285 {
286 	__asm__ __volatile__(
287 	    "addss %2, %1\n\t"
288 	    "movss %1, %0"
289 	    : "=m" (*f3), "+x" (*f1)
290 	    : "x" (*f2));
291 }
292 
293 extern __GNU_INLINE void
sse_subss(float * f1,float * f2,float * f3)294 sse_subss(float *f1, float *f2, float *f3)
295 {
296 	__asm__ __volatile__(
297 	    "subss %2, %1\n\t"
298 	    "movss %1, %0"
299 	    : "=m" (*f3), "+x" (*f1)
300 	    : "x" (*f2));
301 }
302 
303 extern __GNU_INLINE void
sse_mulss(float * f1,float * f2,float * f3)304 sse_mulss(float *f1, float *f2, float *f3)
305 {
306 	__asm__ __volatile__(
307 	    "mulss %2, %1\n\t"
308 	    "movss %1, %0"
309 	    : "=m" (*f3), "+x" (*f1)
310 	    : "x" (*f2));
311 }
312 
313 extern __GNU_INLINE void
sse_divss(float * f1,float * f2,float * f3)314 sse_divss(float *f1, float *f2, float *f3)
315 {
316 	__asm__ __volatile__(
317 	    "divss %2, %1\n\t"
318 	    "movss %1, %0"
319 	    : "=m" (*f3), "+x" (*f1)
320 	    : "x" (*f2));
321 }
322 
323 extern __GNU_INLINE void
sse_sqrtss(float * f1,float * f2)324 sse_sqrtss(float *f1, float *f2)
325 {
326 	double tmp;
327 
328 	__asm__ __volatile__(
329 	    "sqrtss %2, %1\n\t"
330 	    "movss  %1, %0"
331 	    : "=m" (*f2), "=x" (tmp)
332 	    : "m" (*f1));
333 }
334 
335 extern __GNU_INLINE void
sse_ucomiss(float * f1,float * f2)336 sse_ucomiss(float *f1, float *f2)
337 {
338 	__asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
339 
340 }
341 
342 extern __GNU_INLINE void
sse_comiss(float * f1,float * f2)343 sse_comiss(float *f1, float *f2)
344 {
345 	__asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
346 }
347 
348 extern __GNU_INLINE void
sse_cvtss2sd(float * f1,double * d1)349 sse_cvtss2sd(float *f1, double *d1)
350 {
351 	double tmp;
352 
353 	__asm__ __volatile__(
354 	    "cvtss2sd %2, %1\n\t"
355 	    "movsd    %1, %0"
356 	    : "=m" (*d1), "=x" (tmp)
357 	    : "m" (*f1));
358 }
359 
360 extern __GNU_INLINE void
sse_cvtsi2ss(int * i1,float * f1)361 sse_cvtsi2ss(int *i1, float *f1)
362 {
363 	double tmp;
364 
365 	__asm__ __volatile__(
366 	    "cvtsi2ss %2, %1\n\t"
367 	    "movss    %1, %0"
368 	    : "=m" (*f1), "=x" (tmp)
369 	    : "m" (*i1));
370 }
371 
372 extern __GNU_INLINE void
sse_cvttss2si(float * f1,int * i1)373 sse_cvttss2si(float *f1, int *i1)
374 {
375 	int tmp;
376 
377 	__asm__ __volatile__(
378 	    "cvttss2si %2, %1\n\t"
379 	    "movl      %1, %0"
380 	    : "=m" (*i1), "=r" (tmp)
381 	    : "m" (*f1));
382 }
383 
384 extern __GNU_INLINE void
sse_cvtss2si(float * f1,int * i1)385 sse_cvtss2si(float *f1, int *i1)
386 {
387 	int tmp;
388 
389 	__asm__ __volatile__(
390 	    "cvtss2si %2, %1\n\t"
391 	    "movl     %1, %0"
392 	    : "=m" (*i1), "=r" (tmp)
393 	    : "m" (*f1));
394 }
395 
396 #if defined(__amd64)
397 extern __GNU_INLINE void
sse_cvtsi2ssq(long long * ll1,float * f1)398 sse_cvtsi2ssq(long long *ll1, float *f1)
399 {
400 	double tmp;
401 
402 	__asm__ __volatile__(
403 	    "cvtsi2ssq %2, %1\n\t"
404 	    "movss     %1, %0"
405 	    : "=m" (*f1), "=x" (tmp)
406 	    : "m" (*ll1));
407 }
408 
409 extern __GNU_INLINE void
sse_cvttss2siq(float * f1,long long * ll1)410 sse_cvttss2siq(float *f1, long long *ll1)
411 {
412 	uint64_t tmp;
413 
414 	__asm__ __volatile__(
415 	    "cvttss2siq %2, %1\n\t"
416 	    "movq       %1, %0"
417 	    : "=m" (*ll1), "=r" (tmp)
418 	    : "m" (*f1));
419 }
420 
421 extern __GNU_INLINE void
sse_cvtss2siq(float * f1,long long * ll1)422 sse_cvtss2siq(float *f1, long long *ll1)
423 {
424 	uint64_t tmp;
425 
426 	__asm__ __volatile__(
427 	    "cvtss2siq %2, %1\n\t"
428 	    "movq      %1, %0"
429 	    : "=m" (*ll1), "=r" (tmp)
430 	    : "m" (*f1));
431 }
432 
433 #endif
434 
435 extern __GNU_INLINE void
sse_cmpeqsd(double * d1,double * d2,long long * ll1)436 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
437 {
438 	__asm__ __volatile__(
439 	    "cmpeqsd %2,%1\n\t"
440 	    "movsd   %1,%0"
441 	    : "=m" (*ll1), "+x" (*d1)
442 	    : "x" (*d2));
443 }
444 
445 extern __GNU_INLINE void
sse_cmpltsd(double * d1,double * d2,long long * ll1)446 sse_cmpltsd(double *d1, double *d2, long long *ll1)
447 {
448 	__asm__ __volatile__(
449 	    "cmpltsd %2,%1\n\t"
450 	    "movsd   %1,%0"
451 	    : "=m" (*ll1), "+x" (*d1)
452 	    : "x" (*d2));
453 }
454 
455 extern __GNU_INLINE void
sse_cmplesd(double * d1,double * d2,long long * ll1)456 sse_cmplesd(double *d1, double *d2, long long *ll1)
457 {
458 	__asm__ __volatile__(
459 	    "cmplesd %2,%1\n\t"
460 	    "movsd   %1,%0"
461 	    : "=m" (*ll1), "+x" (*d1)
462 	    : "x" (*d2));
463 }
464 
465 extern __GNU_INLINE void
sse_cmpunordsd(double * d1,double * d2,long long * ll1)466 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
467 {
468 	__asm__ __volatile__(
469 	    "cmpunordsd %2,%1\n\t"
470 	    "movsd      %1,%0"
471 	    : "=m" (*ll1), "+x" (*d1)
472 	    : "x" (*d2));
473 }
474 
475 
476 extern __GNU_INLINE void
sse_minsd(double * d1,double * d2,double * d3)477 sse_minsd(double *d1, double *d2, double *d3)
478 {
479 	__asm__ __volatile__(
480 	    "minsd %2,%1\n\t"
481 	    "movsd %1,%0"
482 	    : "=m" (*d3), "+x" (*d1)
483 	    : "x" (*d2));
484 }
485 
486 extern __GNU_INLINE void
sse_maxsd(double * d1,double * d2,double * d3)487 sse_maxsd(double *d1, double *d2, double *d3)
488 {
489 	__asm__ __volatile__(
490 	    "maxsd %2,%1\n\t"
491 	    "movsd %1,%0"
492 	    : "=m" (*d3), "+x" (*d1)
493 	    : "x" (*d2));
494 }
495 
496 extern __GNU_INLINE void
sse_addsd(double * d1,double * d2,double * d3)497 sse_addsd(double *d1, double *d2, double *d3)
498 {
499 	__asm__ __volatile__(
500 	    "addsd %2,%1\n\t"
501 	    "movsd %1,%0"
502 	    : "=m" (*d3), "+x" (*d1)
503 	    : "x" (*d2));
504 }
505 
506 extern __GNU_INLINE void
sse_subsd(double * d1,double * d2,double * d3)507 sse_subsd(double *d1, double *d2, double *d3)
508 {
509 	__asm__ __volatile__(
510 	    "subsd %2,%1\n\t"
511 	    "movsd %1,%0"
512 	    : "=m" (*d3), "+x" (*d1)
513 	    : "x" (*d2));
514 }
515 
516 extern __GNU_INLINE void
sse_mulsd(double * d1,double * d2,double * d3)517 sse_mulsd(double *d1, double *d2, double *d3)
518 {
519 	__asm__ __volatile__(
520 	    "mulsd %2,%1\n\t"
521 	    "movsd %1,%0"
522 	    : "=m" (*d3), "+x" (*d1)
523 	    : "x" (*d2));
524 }
525 
526 extern __GNU_INLINE void
sse_divsd(double * d1,double * d2,double * d3)527 sse_divsd(double *d1, double *d2, double *d3)
528 {
529 	__asm__ __volatile__(
530 	    "divsd %2,%1\n\t"
531 	    "movsd %1,%0"
532 	    : "=m" (*d3), "+x" (*d1)
533 	    : "x" (*d2));
534 }
535 
536 extern __GNU_INLINE void
sse_sqrtsd(double * d1,double * d2)537 sse_sqrtsd(double *d1, double *d2)
538 {
539 	double tmp;
540 
541 	__asm__ __volatile__(
542 	    "sqrtsd %2, %1\n\t"
543 	    "movsd %1, %0"
544 	    : "=m" (*d2), "=x" (tmp)
545 	    : "m" (*d1));
546 }
547 
548 extern __GNU_INLINE void
sse_ucomisd(double * d1,double * d2)549 sse_ucomisd(double *d1, double *d2)
550 {
551 	__asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
552 }
553 
554 extern __GNU_INLINE void
sse_comisd(double * d1,double * d2)555 sse_comisd(double *d1, double *d2)
556 {
557 	__asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
558 }
559 
560 extern __GNU_INLINE void
sse_cvtsd2ss(double * d1,float * f1)561 sse_cvtsd2ss(double *d1, float *f1)
562 {
563 	double tmp;
564 
565 	__asm__ __volatile__(
566 	    "cvtsd2ss %2,%1\n\t"
567 	    "movss    %1,%0"
568 	    : "=m" (*f1), "=x" (tmp)
569 	    : "m" (*d1));
570 }
571 
572 extern __GNU_INLINE void
sse_cvtsi2sd(int * i1,double * d1)573 sse_cvtsi2sd(int *i1, double *d1)
574 {
575 	double tmp;
576 	__asm__ __volatile__(
577 	    "cvtsi2sd %2,%1\n\t"
578 	    "movsd    %1,%0"
579 	    : "=m" (*d1), "=x" (tmp)
580 	    : "m" (*i1));
581 }
582 
583 extern __GNU_INLINE void
sse_cvttsd2si(double * d1,int * i1)584 sse_cvttsd2si(double *d1, int *i1)
585 {
586 	int tmp;
587 
588 	__asm__ __volatile__(
589 	    "cvttsd2si %2,%1\n\t"
590 	    "movl      %1,%0"
591 	    : "=m" (*i1), "=r" (tmp)
592 	    : "m" (*d1));
593 }
594 
595 extern __GNU_INLINE void
sse_cvtsd2si(double * d1,int * i1)596 sse_cvtsd2si(double *d1, int *i1)
597 {
598 	int tmp;
599 
600 	__asm__ __volatile__(
601 	    "cvtsd2si %2,%1\n\t"
602 	    "movl     %1,%0"
603 	    : "=m" (*i1), "=r" (tmp)
604 	    : "m" (*d1));
605 }
606 
607 #if defined(__amd64)
608 extern __GNU_INLINE void
sse_cvtsi2sdq(long long * ll1,double * d1)609 sse_cvtsi2sdq(long long *ll1, double *d1)
610 {
611 	double tmp;
612 
613 	__asm__ __volatile__(
614 	    "cvtsi2sdq %2,%1\n\t"
615 	    "movsd     %1,%0"
616 	    : "=m" (*d1), "=x" (tmp)
617 	    : "m" (*ll1));
618 }
619 
620 extern __GNU_INLINE void
sse_cvttsd2siq(double * d1,long long * ll1)621 sse_cvttsd2siq(double *d1, long long *ll1)
622 {
623 	uint64_t tmp;
624 
625 	__asm__ __volatile__(
626 	    "cvttsd2siq %2,%1\n\t"
627 	    "movq       %1,%0"
628 	    : "=m" (*ll1), "=r" (tmp)
629 	    : "m" (*d1));
630 }
631 
632 extern __GNU_INLINE void
sse_cvtsd2siq(double * d1,long long * ll1)633 sse_cvtsd2siq(double *d1, long long *ll1)
634 {
635 	uint64_t tmp;
636 
637 	__asm__ __volatile__(
638 	    "cvtsd2siq %2,%1\n\t"
639 	    "movq      %1,%0"
640 	    : "=m" (*ll1), "=r" (tmp)
641 	    : "m" (*d1));
642 }
643 #endif
644 
645 #elif defined(__sparc)
646 extern __GNU_INLINE void
647 __fenv_getfsr(unsigned long *l)
648 {
649 	__asm__ __volatile__(
650 #if defined(__sparcv9)
651 	    "stx %%fsr,%0\n\t"
652 #else
653 	    "st  %%fsr,%0\n\t"
654 #endif
655 	    : "=m" (*l));
656 }
657 
658 extern __GNU_INLINE void
659 __fenv_setfsr(const unsigned long *l)
660 {
661 	__asm__ __volatile__(
662 #if defined(__sparcv9)
663 	    "ldx %0,%%fsr\n\t"
664 #else
665 	    "ld %0,%%fsr\n\t"
666 #endif
667 	    : : "m" (*l) : "cc");
668 }
669 
670 extern __GNU_INLINE void
671 __fenv_getfsr32(unsigned int *l)
672 {
673 	__asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
674 }
675 
676 extern __GNU_INLINE void
677 __fenv_setfsr32(const unsigned int *l)
678 {
679 	__asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
680 }
681 #else
682 #error "GCC FENV inlines not implemented for this platform"
683 #endif
684 
685 #ifdef __cplusplus
686 }
687 #endif
688 
689 #endif  /* __GNUC__ */
690 
691 #endif /* _FENV_INLINES_H */
692