1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <string.h>
37 #include <math.h>
38 #ifdef USE_QUAD_PRECISION
39 #ifdef _WIN32
40 #include <float.h>
41 #endif
42 #endif
43 #ifdef USE_SSE2
44 #include <emmintrin.h>
45 #endif
46 #ifdef _OPENMP
47 #include <omp.h>
48 #endif
49 #ifdef USE_MPI
50 #include <mpi.h>
51 #endif
52 #include "lislib.h"
53
54 #ifdef USE_QUAD_PRECISION
55 #undef __FUNC__
56 #define __FUNC__ "lis_quad_x87_fpu_init"
lis_quad_x87_fpu_init(LIS_UNSIGNED_INT * cw_old)57 void lis_quad_x87_fpu_init(LIS_UNSIGNED_INT *cw_old)
58 {
59 #ifdef HAS_X87_FPU
60 #ifdef _WIN32
61 #ifndef _WIN64
62 LIS_UNSIGNED_INT cw = _control87(0, 0);
63 _control87(0x00010000, 0x00030000);
64 *cw_old = cw;
65 cw = _control87(0, 0);
66 #endif
67 #else
68 LIS_INT cw,cw_new;
69 asm volatile ("fnstcw %0":"=m" (cw));
70 cw_new = (cw & ~0x0300) | 0x0200;
71 asm volatile ("fldcw %0": :"m" (cw_new));
72 *cw_old = cw;
73 #endif
74 #else
75 *cw_old = 0;
76 #endif
77 }
78
79 #undef __FUNC__
80 #define __FUNC__ "lis_quad_x87_fpu_finalize"
lis_quad_x87_fpu_finalize(LIS_UNSIGNED_INT cw)81 void lis_quad_x87_fpu_finalize(LIS_UNSIGNED_INT cw)
82 {
83 #ifdef HAS_X87_FPU
84 #ifdef _WIN32
85 #ifndef _WIN64
86 _control87(cw, 0xFFFFFFFF);
87 #endif
88 #else
89 asm volatile ("fldcw %0": :"m" (cw));
90 #endif
91 #endif
92 }
93
94 #undef __FUNC__
95 #define __FUNC__ "lis_quad_minus"
lis_quad_minus(LIS_QUAD * a)96 void lis_quad_minus(LIS_QUAD *a)
97 {
98 a->hi = -a->hi;
99 a->lo = -a->lo;
100 }
101
102 #undef __FUNC__
103 #define __FUNC__ "lis_quad_zero"
lis_quad_zero(LIS_QUAD * a)104 void lis_quad_zero(LIS_QUAD *a)
105 {
106 a->hi = 0.0;
107 a->lo = 0.0;
108 }
109
110 #undef __FUNC__
111 #define __FUNC__ "lis_quad_one"
lis_quad_one(LIS_QUAD * a)112 void lis_quad_one(LIS_QUAD *a)
113 {
114 a->hi = 1.0;
115 a->lo = 0.0;
116 }
117
118 #undef __FUNC__
119 #define __FUNC__ "lis_quad_max"
lis_quad_max(LIS_QUAD * a,LIS_QUAD * b,LIS_QUAD * c)120 void lis_quad_max(LIS_QUAD *a, LIS_QUAD *b, LIS_QUAD *c)
121 {
122 if( (a->hi > b->hi) || ((a->hi==b->hi) && (a->lo > b->lo)) )
123 {
124 c->hi = a->hi;
125 c->lo = a->lo;
126 }
127 else
128 {
129 c->hi = b->hi;
130 c->lo = b->lo;
131 }
132 }
133
134 #undef __FUNC__
135 #define __FUNC__ "lis_quad_min"
lis_quad_min(LIS_QUAD * a,LIS_QUAD * b,LIS_QUAD * c)136 void lis_quad_min(LIS_QUAD *a, LIS_QUAD *b, LIS_QUAD *c)
137 {
138 if( (a->hi < b->hi) || ((a->hi==b->hi) && (a->lo < b->lo)) )
139 {
140 c->hi = a->hi;
141 c->lo = a->lo;
142 }
143 else
144 {
145 c->hi = b->hi;
146 c->lo = b->lo;
147 }
148 }
149
150
151
152 #undef __FUNC__
153 #define __FUNC__ "lis_quad_add"
lis_quad_add(LIS_QUAD * a,const LIS_QUAD * b,const LIS_QUAD * c)154 void lis_quad_add(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
155 {
156 LIS_QUAD_DECLAR;
157
158 #ifndef USE_SSE2
159 LIS_QUAD_ADD(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
160 #else
161 LIS_QUAD_ADD_SSE2(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
162 #endif
163 }
164
165 #undef __FUNC__
166 #define __FUNC__ "lis_quad_sub"
lis_quad_sub(LIS_QUAD * a,const LIS_QUAD * b,const LIS_QUAD * c)167 void lis_quad_sub(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
168 {
169 LIS_QUAD_DECLAR;
170
171 #ifndef USE_SSE2
172 LIS_QUAD_ADD(a->hi,a->lo,b->hi,b->lo,-c->hi,-c->lo);
173 #else
174 LIS_QUAD_ADD_SSE2(a->hi,a->lo,b->hi,b->lo,-c->hi,-c->lo);
175 #endif
176 }
177
178
179 #undef __FUNC__
180 #define __FUNC__ "lis_quad_mul"
lis_quad_mul(LIS_QUAD * a,const LIS_QUAD * b,const LIS_QUAD * c)181 void lis_quad_mul(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
182 {
183 LIS_QUAD_DECLAR;
184
185 #ifndef USE_SSE2
186 LIS_QUAD_MUL(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
187 #else
188 LIS_QUAD_MUL_SSE2(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
189 #endif
190 }
191
192 #undef __FUNC__
193 #define __FUNC__ "lis_quad_mul_dd_d"
lis_quad_mul_dd_d(LIS_QUAD * a,const LIS_QUAD * b,const double c)194 void lis_quad_mul_dd_d(LIS_QUAD *a, const LIS_QUAD *b, const double c)
195 {
196 LIS_QUAD_DECLAR;
197
198 #ifndef USE_SSE2
199 LIS_QUAD_MULD(a->hi,a->lo,b->hi,b->lo,c);
200 #else
201 LIS_QUAD_MULD_SSE2(a->hi,a->lo,b->hi,b->lo,c);
202 #endif
203 }
204
205 #undef __FUNC__
206 #define __FUNC__ "lis_quad_sqr"
lis_quad_sqr(LIS_QUAD * a,const LIS_QUAD * b)207 void lis_quad_sqr(LIS_QUAD *a, const LIS_QUAD *b)
208 {
209 LIS_QUAD_DECLAR;
210
211 #ifndef USE_SSE2
212 LIS_QUAD_SQR(a->hi,a->lo,b->hi,b->lo);
213 #else
214 LIS_QUAD_SQR_SSE2(a->hi,a->lo,b->hi,b->lo);
215 #endif
216 }
217
218 #undef __FUNC__
219 #define __FUNC__ "lis_quad_div"
lis_quad_div(LIS_QUAD * a,const LIS_QUAD * b,const LIS_QUAD * c)220 void lis_quad_div(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
221 {
222 LIS_QUAD_DECLAR;
223
224 #ifndef USE_SSE2
225 LIS_QUAD_DIV(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
226 #else
227 LIS_QUAD_DIV_SSE2(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
228 #endif
229 }
230
231 #undef __FUNC__
232 #define __FUNC__ "lis_quad_sqrt"
lis_quad_sqrt(LIS_QUAD * a,const LIS_QUAD * b)233 LIS_INT lis_quad_sqrt(LIS_QUAD *a, const LIS_QUAD *b)
234 {
235 LIS_QUAD_DECLAR;
236
237 #ifndef USE_SSE2
238 LIS_QUAD_SQRT(a->hi,a->lo,b->hi,b->lo);
239 #else
240 LIS_QUAD_SQRT_SSE2(a->hi,a->lo,b->hi,b->lo);
241 #endif
242 return LIS_SUCCESS;
243 }
244 #endif
245
246