1 // GENERATED CODE --- DO NOT EDIT ---
2 // Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3
4 #ifdef __OPENCL_VERSION__
5 # define USE_OPENCL
6 #endif
7
8 #define USE_KAHAN_SUMMATION 0
9
10 // If opencl is not available, then we are compiling a C function
11 // Note: if using a C++ compiler, then define kernel as extern "C"
12 #ifndef USE_OPENCL
13 # ifdef __cplusplus
14 #include <cstdio>
15 #include <cmath>
16 using namespace std;
17 #if defined(_MSC_VER)
18 # define kernel extern "C" __declspec( dllexport )
trunc(float x)19 inline float trunc(float x) { return x>=0?floor(x):-floor(-x); }
fmin(float x,float y)20 inline float fmin(float x, float y) { return x>y ? y : x; }
fmax(float x,float y)21 inline float fmax(float x, float y) { return x<y ? y : x; }
22 #else
23 # define kernel extern "C"
24 #endif
SINCOS(float angle,float & svar,float & cvar)25 inline void SINCOS(float angle, float &svar, float &cvar) { svar=sin(angle); cvar=cos(angle); }
26 # else
27 #include <stdio.h>
28 #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
29 // MSVC doesn't support C99, so no need for dllexport on C99 branch
30 #define kernel
31 #define SINCOS(angle,svar,cvar) do {const float _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
32 # endif
33 # define global
34 # define local
35 # define constant const
36 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0
37 // OpenCL pown(a,b) = C99 pow(a,b), b integer
38 # define powr(a,b) pow(a,b)
39 # define pown(a,b) pow(a,b)
40 #else
41 # ifdef USE_SINCOS
42 # define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
43 # else
44 # define SINCOS(angle,svar,cvar) do {const float _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
45 # endif
46 #endif
47
48 // Standard mathematical constants:
49 // M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
50 // M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
51 // OpenCL defines M_constant_F for float constants, and nothing if float
52 // is not enabled on the card, which is why these constants may be missing
53 #ifndef M_PI
54 # define M_PI 3.141592653589793f
55 #endif
56 #ifndef M_PI_2
57 # define M_PI_2 1.570796326794897f
58 #endif
59 #ifndef M_PI_4
60 # define M_PI_4 0.7853981633974483f
61 #endif
62
63 // Non-standard pi/180, used for converting between degrees and radians
64 #ifndef M_PI_180
65 # define M_PI_180 0.017453292519943295f
66 #endif
67
68
69 #define IQ_KERNEL_NAME mass_surface_fractal_Iq
70 #define IQ_PARAMETERS mass_dim, surface_dim, cluster_rg, primary_rg
71 #define IQ_FIXED_PARAMETER_DECLARATIONS const float scale, \
72 const float background, \
73 const float mass_dim, \
74 const float surface_dim, \
75 const float cluster_rg, \
76 const float primary_rg
77 #define IQXY_KERNEL_NAME mass_surface_fractal_Iqxy
78 #define IQXY_PARAMETERS mass_dim, surface_dim, cluster_rg, primary_rg
79 #define IQXY_FIXED_PARAMETER_DECLARATIONS const float scale, \
80 const float background, \
81 const float mass_dim, \
82 const float surface_dim, \
83 const float cluster_rg, \
84 const float primary_rg
85
86 float form_volume(float radius);
87
88 float Iq(float q,
89 float mass_dim,
90 float surface_dim,
91 float cluster_rg,
92 float primary_rg);
93
94 float Iqxy(float qx, float qy,
95 float mass_dim,
96 float surface_dim,
97 float cluster_rg,
98 float primary_rg);
99
100
_mass_surface_fractal_kernel(float q,float mass_dim,float surface_dim,float cluster_rg,float primary_rg)101 static float _mass_surface_fractal_kernel(float q,
102 float mass_dim,
103 float surface_dim,
104 float cluster_rg,
105 float primary_rg)
106 {
107 //computation
108 float tot_dim = 6.0f - surface_dim - mass_dim;
109 mass_dim /= 2.0f;
110 tot_dim /= 2.0f;
111
112 float rc_norm = cluster_rg * cluster_rg / (3.0f * mass_dim);
113 float rp_norm = primary_rg * primary_rg / (3.0f * tot_dim);
114
115 //x for P
116 float x_val1 = 1.0f + q * q * rc_norm;
117 float x_val2 = 1.0f + q * q * rp_norm;
118
119 float inv_form = pow(x_val1, mass_dim) * pow(x_val2, tot_dim);
120
121 //another singular
122 if (inv_form == 0.0f) return 0.0f;
123
124 float form_factor = 1.0f;
125 form_factor /= inv_form;
126
127 return (form_factor);
128 }
form_volume(float radius)129 float form_volume(float radius){
130
131 return 1.333333333333333f*M_PI*radius*radius*radius;
132 }
133
Iq(float q,float mass_dim,float surface_dim,float cluster_rg,float primary_rg)134 float Iq(float q,
135 float mass_dim,
136 float surface_dim,
137 float cluster_rg,
138 float primary_rg)
139 {
140 return _mass_surface_fractal_kernel(q,
141 mass_dim,
142 surface_dim,
143 cluster_rg,
144 primary_rg);
145 }
146
147 // Iqxy is never called since no orientation or magnetic parameters.
Iqxy(float qx,float qy,float mass_dim,float surface_dim,float cluster_rg,float primary_rg)148 float Iqxy(float qx, float qy,
149 float mass_dim,
150 float surface_dim,
151 float cluster_rg,
152 float primary_rg)
153 {
154 float q = sqrt(qx*qx + qy*qy);
155 return _mass_surface_fractal_kernel(q,
156 mass_dim,
157 surface_dim,
158 cluster_rg,
159 primary_rg);
160 }
161
162
163
164 /*
165 ##########################################################
166 # #
167 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
168 # !! !! #
169 # !! KEEP THIS CODE CONSISTENT WITH KERNELPY.PY !! #
170 # !! !! #
171 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #
172 # #
173 ##########################################################
174 */
175
176 #ifdef IQ_KERNEL_NAME
IQ_KERNEL_NAME(global const float * q,global float * result,const int Nq,global float * loops_g,local float * loops,const float cutoff,IQ_DISPERSION_LENGTH_DECLARATIONS,IQ_FIXED_PARAMETER_DECLARATIONS)177 kernel void IQ_KERNEL_NAME(
178 global const float *q,
179 global float *result,
180 const int Nq,
181 #ifdef IQ_OPEN_LOOPS
182 #ifdef USE_OPENCL
183 global float *loops_g,
184 #endif
185 local float *loops,
186 const float cutoff,
187 IQ_DISPERSION_LENGTH_DECLARATIONS,
188 #endif
189 IQ_FIXED_PARAMETER_DECLARATIONS
190 )
191 {
192 #ifdef USE_OPENCL
193 #ifdef IQ_OPEN_LOOPS
194 // copy loops info to local memory
195 event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
196 wait_group_events(1, &e);
197 #endif
198
199 int i = get_global_id(0);
200 if (i < Nq)
201 #else
202 #pragma omp parallel for
203 for (int i=0; i < Nq; i++)
204 #endif
205 {
206 const float qi = q[i];
207 #ifdef IQ_OPEN_LOOPS
208 float ret=0.0f, norm=0.0f;
209 #ifdef VOLUME_PARAMETERS
210 float vol=0.0f, norm_vol=0.0f;
211 #endif
212 IQ_OPEN_LOOPS
213 //for (int radius_i=0; radius_i < Nradius; radius_i++) {
214 // const float radius = loops[2*(radius_i)];
215 // const float radius_w = loops[2*(radius_i)+1];
216
217 const float weight = IQ_WEIGHT_PRODUCT;
218 if (weight > cutoff) {
219 const float scattering = Iq(qi, IQ_PARAMETERS);
220 // allow kernels to exclude invalid regions by returning NaN
221 if (!isnan(scattering)) {
222 ret += weight*scattering;
223 norm += weight;
224 #ifdef VOLUME_PARAMETERS
225 const float vol_weight = VOLUME_WEIGHT_PRODUCT;
226 vol += vol_weight*form_volume(VOLUME_PARAMETERS);
227 norm_vol += vol_weight;
228 #endif
229 }
230 //else { printf("exclude qx,qy,I:%g,%g,%g\n",qi,scattering); }
231 }
232 IQ_CLOSE_LOOPS
233 #ifdef VOLUME_PARAMETERS
234 if (vol*norm_vol != 0.0f) {
235 ret *= norm_vol/vol;
236 }
237 #endif
238 result[i] = scale*ret/norm+background;
239 #else
240 result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
241 #endif
242 }
243 }
244 #endif
245
246
247 #ifdef IQXY_KERNEL_NAME
IQXY_KERNEL_NAME(global const float * qx,global const float * qy,global float * result,const int Nq,global float * loops_g,local float * loops,const float cutoff,IQXY_DISPERSION_LENGTH_DECLARATIONS,IQXY_FIXED_PARAMETER_DECLARATIONS)248 kernel void IQXY_KERNEL_NAME(
249 global const float *qx,
250 global const float *qy,
251 global float *result,
252 const int Nq,
253 #ifdef IQXY_OPEN_LOOPS
254 #ifdef USE_OPENCL
255 global float *loops_g,
256 #endif
257 local float *loops,
258 const float cutoff,
259 IQXY_DISPERSION_LENGTH_DECLARATIONS,
260 #endif
261 IQXY_FIXED_PARAMETER_DECLARATIONS
262 )
263 {
264 #ifdef USE_OPENCL
265 #ifdef IQXY_OPEN_LOOPS
266 // copy loops info to local memory
267 event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
268 wait_group_events(1, &e);
269 #endif
270
271 int i = get_global_id(0);
272 if (i < Nq)
273 #else
274 #pragma omp parallel for
275 for (int i=0; i < Nq; i++)
276 #endif
277 {
278 const float qxi = qx[i];
279 const float qyi = qy[i];
280 #if USE_KAHAN_SUMMATION
281 float accumulated_error = 0.0f;
282 #endif
283 #ifdef IQXY_OPEN_LOOPS
284 float ret=0.0f, norm=0.0f;
285 #ifdef VOLUME_PARAMETERS
286 float vol=0.0f, norm_vol=0.0f;
287 #endif
288 IQXY_OPEN_LOOPS
289 //for (int radius_i=0; radius_i < Nradius; radius_i++) {
290 // const float radius = loops[2*(radius_i)];
291 // const float radius_w = loops[2*(radius_i)+1];
292
293 const float weight = IQXY_WEIGHT_PRODUCT;
294 if (weight > cutoff) {
295
296 const float scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
297 if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
298 //if (scattering >= 0.0f) { // scattering cannot be negative
299 // TODO: use correct angle for spherical correction
300 // Definition of theta and phi are probably reversed relative to the
301 // equation which gave rise to this correction, leading to an
302 // attenuation of the pattern as theta moves through pi/2.f Either
303 // reverse the meanings of phi and theta in the forms, or use phi
304 // rather than theta in this correction. Current code uses cos(theta)
305 // so that values match those of sasview.
306 #if defined(IQXY_HAS_THETA) // && 0
307 const float spherical_correction
308 = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0f);
309 const float next = spherical_correction * weight * scattering;
310 #else
311 const float next = weight * scattering;
312 #endif
313 #if USE_KAHAN_SUMMATION
314 const float y = next - accumulated_error;
315 const float t = ret + y;
316 accumulated_error = (t - ret) - y;
317 ret = t;
318 #else
319 ret += next;
320 #endif
321 norm += weight;
322 #ifdef VOLUME_PARAMETERS
323 const float vol_weight = VOLUME_WEIGHT_PRODUCT;
324 vol += vol_weight*form_volume(VOLUME_PARAMETERS);
325 #endif
326 norm_vol += vol_weight;
327 }
328 //else { printf("exclude qx,qy,I:%g,%g,%g\n",qi,scattering); }
329 }
330 IQXY_CLOSE_LOOPS
331 #ifdef VOLUME_PARAMETERS
332 if (vol*norm_vol != 0.0f) {
333 ret *= norm_vol/vol;
334 }
335 #endif
336 result[i] = scale*ret/norm+background;
337 #else
338 result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
339 #endif
340 }
341 }
342 #endif
343