1 // GENERATED CODE --- DO NOT EDIT ---
2 // Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4 #ifdef __OPENCL_VERSION__
5 # define USE_OPENCL
6 #endif
7
8 #define USE_KAHAN_SUMMATION 0
9
10 // If opencl is not available, then we are compiling a C function
11 // Note: if using a C++ compiler, then define kernel as extern "C"
12 #ifndef USE_OPENCL
13 # ifdef __cplusplus
14 #include <cstdio>
15 #include <cmath>
16 using namespace std;
17 #if defined(_MSC_VER)
18 # define kernel extern "C" __declspec( dllexport )
trunc(float x)19 inline float trunc(float x) { return x>=0?floor(x):-floor(-x); }
fmin(float x,float y)20 inline float fmin(float x, float y) { return x>y ? y : x; }
fmax(float x,float y)21 inline float fmax(float x, float y) { return x<y ? y : x; }
22 #else
23 # define kernel extern "C"
24 #endif
SINCOS(float angle,float & svar,float & cvar)25 inline void SINCOS(float angle, float &svar, float &cvar) { svar=sin(angle); cvar=cos(angle); }
26 # else
27 #include <stdio.h>
28 #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
29 // MSVC doesn't support C99, so no need for dllexport on C99 branch
30 #define kernel
31 #define SINCOS(angle,svar,cvar) do {const float _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
32 # endif
33 # define global
34 # define local
35 # define constant const
36 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0
37 // OpenCL pown(a,b) = C99 pow(a,b), b integer
38 # define powr(a,b) pow(a,b)
39 # define pown(a,b) pow(a,b)
40 #else
41 # ifdef USE_SINCOS
42 # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
43 # else
44 # define SINCOS(angle,svar,cvar) do {const float _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
45 # endif
46 #endif
47
48 // Standard mathematical constants:
49 // M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
50 // M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
51 // OpenCL defines M_constant_F for float constants, and nothing if float
52 // is not enabled on the card, which is why these constants may be missing
53 #ifndef M_PI
54 # define M_PI 3.141592653589793f
55 #endif
56 #ifndef M_PI_2
57 # define M_PI_2 1.570796326794897f
58 #endif
59 #ifndef M_PI_4
60 # define M_PI_4 0.7853981633974483f
61 #endif
62
63 // Non-standard pi/180, used for converting between degrees and radians
64 #ifndef M_PI_180
65 # define M_PI_180 0.017453292519943295f
66 #endif
67
68
69 #define VOLUME_PARAMETERS effect_radius
70 #define VOLUME_WEIGHT_PRODUCT effect_radius_w
71 #define VOLUME_PARAMETER_DECLARATIONS float effect_radius
72 #define IQ_KERNEL_NAME hardsphere_Iq
73 #define IQ_PARAMETERS effect_radius, volfraction
74 #define IQ_FIXED_PARAMETER_DECLARATIONS const float scale, \
75 const float background, \
76 const float volfraction
77 #define IQ_WEIGHT_PRODUCT effect_radius_w
78 #define IQ_DISPERSION_LENGTH_DECLARATIONS const int Neffect_radius
79 #define IQ_DISPERSION_LENGTH_SUM Neffect_radius
80 #define IQ_OPEN_LOOPS for (int effect_radius_i=0; effect_radius_i < Neffect_radius; effect_radius_i++) { \
81 const float effect_radius = loops[2*(effect_radius_i)]; \
82 const float effect_radius_w = loops[2*(effect_radius_i)+1];
83 #define IQ_CLOSE_LOOPS }
84 #define IQ_PARAMETER_DECLARATIONS float effect_radius, float volfraction
85 #define IQXY_KERNEL_NAME hardsphere_Iqxy
86 #define IQXY_PARAMETERS effect_radius, volfraction
87 #define IQXY_FIXED_PARAMETER_DECLARATIONS const float scale, \
88 const float background, \
89 const float volfraction
90 #define IQXY_WEIGHT_PRODUCT effect_radius_w
91 #define IQXY_DISPERSION_LENGTH_DECLARATIONS const int Neffect_radius
92 #define IQXY_DISPERSION_LENGTH_SUM Neffect_radius
93 #define IQXY_OPEN_LOOPS for (int effect_radius_i=0; effect_radius_i < Neffect_radius; effect_radius_i++) { \
94 const float effect_radius = loops[2*(effect_radius_i)]; \
95 const float effect_radius_w = loops[2*(effect_radius_i)+1];
96 #define IQXY_CLOSE_LOOPS }
97 #define IQXY_PARAMETER_DECLARATIONS float effect_radius, float volfraction
98
99 float form_volume(VOLUME_PARAMETER_DECLARATIONS);
form_volume(VOLUME_PARAMETER_DECLARATIONS)100 float form_volume(VOLUME_PARAMETER_DECLARATIONS) {
101
102 return 1.0f;
103
104 }
105
106
107 float Iq(float q, IQ_PARAMETER_DECLARATIONS);
Iq(float q,IQ_PARAMETER_DECLARATIONS)108 float Iq(float q, IQ_PARAMETER_DECLARATIONS) {
109
110 float denom,dnum,alpha,beta,gamm,a,asq,ath,afor,rca,rsa;
111 float calp,cbeta,cgam,prefac,c,vstruc;
112 float struc;
113
114 // compute constants
115 denom = pow((1.0f-volfraction),4);
116 dnum = pow((1.0f + 2.0f*volfraction),2);
117 alpha = dnum/denom;
118 beta = -6.0f*volfraction*pow((1.0f + volfraction/2.0f),2)/denom;
119 gamm = 0.50f*volfraction*dnum/denom;
120 //
121 // calculate the structure factor
122 //
123 a = 2.0f*q*effect_radius;
124 asq = a*a;
125 ath = asq*a;
126 afor = ath*a;
127 SINCOS(a,rsa,rca);
128 //rca = cos(a);
129 //rsa = sin(a);
130 calp = alpha*(rsa/asq - rca/a);
131 cbeta = beta*(2.0f*rsa/asq - (asq - 2.0f)*rca/ath - 2.0f/ath);
132 cgam = gamm*(-rca/a + (4.0f/a)*((3.0f*asq - 6.0f)*rca/afor + (asq - 6.0f)*rsa/ath + 6.0f/afor));
133 prefac = -24.0f*volfraction/a;
134 c = prefac*(calp + cbeta + cgam);
135 vstruc = 1.0f/(1.0f-c);
136 struc = vstruc;
137
138 return(struc);
139
140 }
141
142
143 float Iqxy(float qx, float qy, IQXY_PARAMETER_DECLARATIONS);
Iqxy(float qx,float qy,IQXY_PARAMETER_DECLARATIONS)144 float Iqxy(float qx, float qy, IQXY_PARAMETER_DECLARATIONS) {
145
146 // never called since no orientation or magnetic parameters.
147 return Iq(sqrt(qx*qx+qy*qy), IQ_PARAMETERS);
148
149 }
150
151
152 /*
153 ##########################################################
154 # #
155 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
156 # !! !! #
157 # !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
158 # !! !! #
159 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
160 # #
161 ##########################################################
162 */
163
164 #ifdef IQ_KERNEL_NAME
IQ_KERNEL_NAME(global const float * q,global float * result,const int Nq,global float * loops_g,local float * loops,const float cutoff,IQ_DISPERSION_LENGTH_DECLARATIONS,IQ_FIXED_PARAMETER_DECLARATIONS)165 kernel void IQ_KERNEL_NAME(
166 global const float *q,
167 global float *result,
168 const int Nq,
169 #ifdef IQ_OPEN_LOOPS
170 #ifdef USE_OPENCL
171 global float *loops_g,
172 #endif
173 local float *loops,
174 const float cutoff,
175 IQ_DISPERSION_LENGTH_DECLARATIONS,
176 #endif
177 IQ_FIXED_PARAMETER_DECLARATIONS
178 )
179 {
180 #ifdef USE_OPENCL
181 #ifdef IQ_OPEN_LOOPS
182 // copy loops info to local memory
183 event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
184 wait_group_events(1, &e);
185 #endif
186
187 int i = get_global_id(0);
188 if (i < Nq)
189 #else
190 #pragma omp parallel for
191 for (int i=0; i < Nq; i++)
192 #endif
193 {
194 const float qi = q[i];
195 #ifdef IQ_OPEN_LOOPS
196 float ret=0.0f, norm=0.0f;
197 #ifdef VOLUME_PARAMETERS
198 float vol=0.0f, norm_vol=0.0f;
199 #endif
200 IQ_OPEN_LOOPS
201 //for (int radius_i=0; radius_i < Nradius; radius_i++) {
202 // const float radius = loops[2*(radius_i)];
203 // const float radius_w = loops[2*(radius_i)+1];
204
205 const float weight = IQ_WEIGHT_PRODUCT;
206 if (weight > cutoff) {
207 const float scattering = Iq(qi, IQ_PARAMETERS);
208 // allow kernels to exclude invalid regions by returning NaN
209 if (!isnan(scattering)) {
210 ret += weight*scattering;
211 norm += weight;
212 #ifdef VOLUME_PARAMETERS
213 const float vol_weight = VOLUME_WEIGHT_PRODUCT;
214 vol += vol_weight*form_volume(VOLUME_PARAMETERS);
215 norm_vol += vol_weight;
216 #endif
217 }
218 //else { printf("exclude qx,qy,I:%g,%g,%g\n",qi,scattering); }
219 }
220 IQ_CLOSE_LOOPS
221 #ifdef VOLUME_PARAMETERS
222 if (vol*norm_vol != 0.0f) {
223 ret *= norm_vol/vol;
224 }
225 #endif
226 result[i] = scale*ret/norm+background;
227 #else
228 result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
229 #endif
230 }
231 }
232 #endif
233
234
235 #ifdef IQXY_KERNEL_NAME
IQXY_KERNEL_NAME(global const float * qx,global const float * qy,global float * result,const int Nq,global float * loops_g,local float * loops,const float cutoff,IQXY_DISPERSION_LENGTH_DECLARATIONS,IQXY_FIXED_PARAMETER_DECLARATIONS)236 kernel void IQXY_KERNEL_NAME(
237 global const float *qx,
238 global const float *qy,
239 global float *result,
240 const int Nq,
241 #ifdef IQXY_OPEN_LOOPS
242 #ifdef USE_OPENCL
243 global float *loops_g,
244 #endif
245 local float *loops,
246 const float cutoff,
247 IQXY_DISPERSION_LENGTH_DECLARATIONS,
248 #endif
249 IQXY_FIXED_PARAMETER_DECLARATIONS
250 )
251 {
252 #ifdef USE_OPENCL
253 #ifdef IQXY_OPEN_LOOPS
254 // copy loops info to local memory
255 event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
256 wait_group_events(1, &e);
257 #endif
258
259 int i = get_global_id(0);
260 if (i < Nq)
261 #else
262 #pragma omp parallel for
263 for (int i=0; i < Nq; i++)
264 #endif
265 {
266 const float qxi = qx[i];
267 const float qyi = qy[i];
268 #if USE_KAHAN_SUMMATION
269 float accumulated_error = 0.0f;
270 #endif
271 #ifdef IQXY_OPEN_LOOPS
272 float ret=0.0f, norm=0.0f;
273 #ifdef VOLUME_PARAMETERS
274 float vol=0.0f, norm_vol=0.0f;
275 #endif
276 IQXY_OPEN_LOOPS
277 //for (int radius_i=0; radius_i < Nradius; radius_i++) {
278 // const float radius = loops[2*(radius_i)];
279 // const float radius_w = loops[2*(radius_i)+1];
280
281 const float weight = IQXY_WEIGHT_PRODUCT;
282 if (weight > cutoff) {
283
284 const float scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
285 if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
286 //if (scattering >= 0.0f) { // scattering cannot be negative
287 // TODO: use correct angle for spherical correction
288 // Definition of theta and phi are probably reversed relative to the
289 // equation which gave rise to this correction, leading to an
290 // attenuation of the pattern as theta moves through pi/2.f Either
291 // reverse the meanings of phi and theta in the forms, or use phi
292 // rather than theta in this correction. Current code uses cos(theta)
293 // so that values match those of sasview.
294 #if defined(IQXY_HAS_THETA) // && 0
295 const float spherical_correction
296 = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0f);
297 const float next = spherical_correction * weight * scattering;
298 #else
299 const float next = weight * scattering;
300 #endif
301 #if USE_KAHAN_SUMMATION
302 const float y = next - accumulated_error;
303 const float t = ret + y;
304 accumulated_error = (t - ret) - y;
305 ret = t;
306 #else
307 ret += next;
308 #endif
309 norm += weight;
310 #ifdef VOLUME_PARAMETERS
311 const float vol_weight = VOLUME_WEIGHT_PRODUCT;
312 vol += vol_weight*form_volume(VOLUME_PARAMETERS);
313 #endif
314 norm_vol += vol_weight;
315 }
316 //else { printf("exclude qx,qy,I:%g,%g,%g\n",qi,scattering); }
317 }
318 IQXY_CLOSE_LOOPS
319 #ifdef VOLUME_PARAMETERS
320 if (vol*norm_vol != 0.0f) {
321 ret *= norm_vol/vol;
322 }
323 #endif
324 result[i] = scale*ret/norm+background;
325 #else
326 result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
327 #endif
328 }
329 }
330 #endif
331