1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2 
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions are met:
5    1. Redistributions of source code must retain the above copyright
6       notice, this list of conditions and the following disclaimer.
7    2. Redistributions in binary form must reproduce the above copyright
8       notice, this list of conditions and the following disclaimer in the
9       documentation and/or other materials provided with the distribution.
10    3. Neither the name of the project nor the names of its contributors
11       may be used to endorse or promote products derived from this software
12       without specific prior written permission.
13 
14    THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18    PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19    OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24    POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 	#include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 	#include "lis_config_win.h"
32 #endif
33 #endif
34 
35 #include <stdio.h>
36 #include <string.h>
37 #include <math.h>
38 #ifdef USE_QUAD_PRECISION
39 #ifdef _WIN32
40 	#include <float.h>
41 #endif
42 #endif
43 #ifdef USE_SSE2
44 	#include <emmintrin.h>
45 #endif
46 #ifdef _OPENMP
47 	#include <omp.h>
48 #endif
49 #ifdef USE_MPI
50 	#include <mpi.h>
51 #endif
52 #include "lislib.h"
53 
54 #ifdef USE_QUAD_PRECISION
55 #undef __FUNC__
56 #define __FUNC__ "lis_quad_x87_fpu_init"
lis_quad_x87_fpu_init(LIS_UNSIGNED_INT * cw_old)57 void lis_quad_x87_fpu_init(LIS_UNSIGNED_INT *cw_old)
58 {
59 #ifdef HAS_X87_FPU
60 #ifdef _WIN32
61 #ifndef _WIN64
62 	LIS_UNSIGNED_INT cw = _control87(0, 0);
63 	_control87(0x00010000, 0x00030000);
64 	*cw_old = cw;
65 	cw = _control87(0, 0);
66 #endif
67 #else
68 	LIS_INT cw,cw_new;
69 	asm volatile ("fnstcw %0":"=m" (cw));
70 	cw_new = (cw & ~0x0300) | 0x0200;
71 	asm volatile ("fldcw %0": :"m" (cw_new));
72 	*cw_old = cw;
73 #endif
74 #else
75 	*cw_old = 0;
76 #endif
77 }
78 
79 #undef __FUNC__
80 #define __FUNC__ "lis_quad_x87_fpu_finalize"
lis_quad_x87_fpu_finalize(LIS_UNSIGNED_INT cw)81 void lis_quad_x87_fpu_finalize(LIS_UNSIGNED_INT cw)
82 {
83 #ifdef HAS_X87_FPU
84 #ifdef _WIN32
85 #ifndef _WIN64
86     _control87(cw, 0xFFFFFFFF);
87 #endif
88 #else
89 	asm volatile ("fldcw %0": :"m" (cw));
90 #endif
91 #endif
92 }
93 
94 #undef __FUNC__
95 #define __FUNC__ "lis_quad_minus"
lis_quad_minus(LIS_QUAD * a)96 void  lis_quad_minus(LIS_QUAD *a)
97 {
98 	a->hi = -a->hi;
99 	a->lo = -a->lo;
100 }
101 
102 #undef __FUNC__
103 #define __FUNC__ "lis_quad_zero"
lis_quad_zero(LIS_QUAD * a)104 void  lis_quad_zero(LIS_QUAD *a)
105 {
106 	a->hi = 0.0;
107 	a->lo = 0.0;
108 }
109 
110 #undef __FUNC__
111 #define __FUNC__ "lis_quad_one"
lis_quad_one(LIS_QUAD * a)112 void  lis_quad_one(LIS_QUAD *a)
113 {
114 	a->hi = 1.0;
115 	a->lo = 0.0;
116 }
117 
118 #undef __FUNC__
119 #define __FUNC__ "lis_quad_max"
lis_quad_max(LIS_QUAD * a,LIS_QUAD * b,LIS_QUAD * c)120 void  lis_quad_max(LIS_QUAD *a, LIS_QUAD *b, LIS_QUAD *c)
121 {
122 	if( (a->hi > b->hi) || ((a->hi==b->hi) && (a->lo > b->lo)) )
123 	{
124 		c->hi = a->hi;
125 		c->lo = a->lo;
126 	}
127 	else
128 	{
129 		c->hi = b->hi;
130 		c->lo = b->lo;
131 	}
132 }
133 
134 #undef __FUNC__
135 #define __FUNC__ "lis_quad_min"
lis_quad_min(LIS_QUAD * a,LIS_QUAD * b,LIS_QUAD * c)136 void  lis_quad_min(LIS_QUAD *a, LIS_QUAD *b, LIS_QUAD *c)
137 {
138 	if( (a->hi < b->hi) || ((a->hi==b->hi) && (a->lo < b->lo)) )
139 	{
140 		c->hi = a->hi;
141 		c->lo = a->lo;
142 	}
143 	else
144 	{
145 		c->hi = b->hi;
146 		c->lo = b->lo;
147 	}
148 }
149 
150 
151 
152 #undef __FUNC__
153 #define __FUNC__ "lis_quad_add"
lis_quad_add(LIS_QUAD * a,const LIS_QUAD * b,const LIS_QUAD * c)154 void lis_quad_add(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
155 {
156 	LIS_QUAD_DECLAR;
157 
158 	#ifndef USE_SSE2
159 		LIS_QUAD_ADD(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
160 	#else
161 		LIS_QUAD_ADD_SSE2(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
162 	#endif
163 }
164 
165 #undef __FUNC__
166 #define __FUNC__ "lis_quad_sub"
lis_quad_sub(LIS_QUAD * a,const LIS_QUAD * b,const LIS_QUAD * c)167 void lis_quad_sub(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
168 {
169 	LIS_QUAD_DECLAR;
170 
171 	#ifndef USE_SSE2
172 		LIS_QUAD_ADD(a->hi,a->lo,b->hi,b->lo,-c->hi,-c->lo);
173 	#else
174 		LIS_QUAD_ADD_SSE2(a->hi,a->lo,b->hi,b->lo,-c->hi,-c->lo);
175 	#endif
176 }
177 
178 
179 #undef __FUNC__
180 #define __FUNC__ "lis_quad_mul"
lis_quad_mul(LIS_QUAD * a,const LIS_QUAD * b,const LIS_QUAD * c)181 void lis_quad_mul(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
182 {
183 	LIS_QUAD_DECLAR;
184 
185 	#ifndef USE_SSE2
186 		LIS_QUAD_MUL(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
187 	#else
188 		LIS_QUAD_MUL_SSE2(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
189 	#endif
190 }
191 
192 #undef __FUNC__
193 #define __FUNC__ "lis_quad_mul_dd_d"
lis_quad_mul_dd_d(LIS_QUAD * a,const LIS_QUAD * b,const double c)194  void lis_quad_mul_dd_d(LIS_QUAD *a, const LIS_QUAD *b, const double c)
195 {
196 	LIS_QUAD_DECLAR;
197 
198 	#ifndef USE_SSE2
199 		LIS_QUAD_MULD(a->hi,a->lo,b->hi,b->lo,c);
200 	#else
201 		LIS_QUAD_MULD_SSE2(a->hi,a->lo,b->hi,b->lo,c);
202 	#endif
203 }
204 
205 #undef __FUNC__
206 #define __FUNC__ "lis_quad_sqr"
lis_quad_sqr(LIS_QUAD * a,const LIS_QUAD * b)207 void lis_quad_sqr(LIS_QUAD *a, const LIS_QUAD *b)
208 {
209 	LIS_QUAD_DECLAR;
210 
211 	#ifndef USE_SSE2
212 		LIS_QUAD_SQR(a->hi,a->lo,b->hi,b->lo);
213 	#else
214 		LIS_QUAD_SQR_SSE2(a->hi,a->lo,b->hi,b->lo);
215 	#endif
216 }
217 
218 #undef __FUNC__
219 #define __FUNC__ "lis_quad_div"
lis_quad_div(LIS_QUAD * a,const LIS_QUAD * b,const LIS_QUAD * c)220 void lis_quad_div(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
221 {
222 	LIS_QUAD_DECLAR;
223 
224 	#ifndef USE_SSE2
225 		LIS_QUAD_DIV(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
226 	#else
227 		LIS_QUAD_DIV_SSE2(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
228 	#endif
229 }
230 
231 #undef __FUNC__
232 #define __FUNC__ "lis_quad_sqrt"
lis_quad_sqrt(LIS_QUAD * a,const LIS_QUAD * b)233 LIS_INT lis_quad_sqrt(LIS_QUAD *a, const LIS_QUAD *b)
234 {
235 	LIS_QUAD_DECLAR;
236 
237 	#ifndef USE_SSE2
238 		LIS_QUAD_SQRT(a->hi,a->lo,b->hi,b->lo);
239 	#else
240 		LIS_QUAD_SQRT_SSE2(a->hi,a->lo,b->hi,b->lo);
241 	#endif
242 	return LIS_SUCCESS;
243 }
244 #endif
245 
246