1 // GENERATED CODE --- DO NOT EDIT ---
2 // Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3 
4 #ifdef __OPENCL_VERSION__
5 # define USE_OPENCL
6 #endif
7 
8 #define USE_KAHAN_SUMMATION 0
9 
10 // If opencl is not available, then we are compiling a C function
11 // Note: if using a C++ compiler, then define kernel as extern "C"
12 #ifndef USE_OPENCL
13 #  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18      #   define kernel extern "C" __declspec( dllexport )
trunc(float x)19          inline float trunc(float x) { return x>=0?floor(x):-floor(-x); }
fmin(float x,float y)20 	 inline float fmin(float x, float y) { return x>y ? y : x; }
fmax(float x,float y)21 	 inline float fmax(float x, float y) { return x<y ? y : x; }
22      #else
23      #   define kernel extern "C"
24      #endif
SINCOS(float angle,float & svar,float & cvar)25      inline void SINCOS(float angle, float &svar, float &cvar) { svar=sin(angle); cvar=cos(angle); }
26 #  else
27      #include <stdio.h>
28      #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
29      // MSVC doesn't support C99, so no need for dllexport on C99 branch
30      #define kernel
31      #define SINCOS(angle,svar,cvar) do {const float _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
32 #  endif
33 #  define global
34 #  define local
35 #  define constant const
36 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0
37 // OpenCL pown(a,b) = C99 pow(a,b), b integer
38 #  define powr(a,b) pow(a,b)
39 #  define pown(a,b) pow(a,b)
40 #else
41 #  ifdef USE_SINCOS
42 #    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
43 #  else
44 #    define SINCOS(angle,svar,cvar) do {const float _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
45 #  endif
46 #endif
47 
48 // Standard mathematical constants:
49 //   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
50 //   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
51 // OpenCL defines M_constant_F for float constants, and nothing if float
52 // is not enabled on the card, which is why these constants may be missing
53 #ifndef M_PI
54 #  define M_PI 3.141592653589793f
55 #endif
56 #ifndef M_PI_2
57 #  define M_PI_2 1.570796326794897f
58 #endif
59 #ifndef M_PI_4
60 #  define M_PI_4 0.7853981633974483f
61 #endif
62 
63 // Non-standard pi/180, used for converting between degrees and radians
64 #ifndef M_PI_180
65 #  define M_PI_180 0.017453292519943295f
66 #endif
67 
68 
69 #define IQ_KERNEL_NAME mass_surface_fractal_Iq
70 #define IQ_PARAMETERS mass_dim, surface_dim, cluster_rg, primary_rg
71 #define IQ_FIXED_PARAMETER_DECLARATIONS const float scale, \
72     const float background, \
73     const float mass_dim, \
74     const float surface_dim, \
75     const float cluster_rg, \
76     const float primary_rg
77 #define IQXY_KERNEL_NAME mass_surface_fractal_Iqxy
78 #define IQXY_PARAMETERS mass_dim, surface_dim, cluster_rg, primary_rg
79 #define IQXY_FIXED_PARAMETER_DECLARATIONS const float scale, \
80     const float background, \
81     const float mass_dim, \
82     const float surface_dim, \
83     const float cluster_rg, \
84     const float primary_rg
85 
86 float form_volume(float radius);
87 
88 float Iq(float q,
89           float mass_dim,
90           float surface_dim,
91           float cluster_rg,
92           float primary_rg);
93 
94 float Iqxy(float qx, float qy,
95           float mass_dim,
96           float surface_dim,
97           float cluster_rg,
98           float primary_rg);
99 
100 
_mass_surface_fractal_kernel(float q,float mass_dim,float surface_dim,float cluster_rg,float primary_rg)101 static float _mass_surface_fractal_kernel(float q,
102           float mass_dim,
103           float surface_dim,
104           float cluster_rg,
105           float primary_rg)
106 {
107      //computation
108     float tot_dim = 6.0f - surface_dim - mass_dim;
109     mass_dim /= 2.0f;
110     tot_dim /= 2.0f;
111 
112     float rc_norm = cluster_rg * cluster_rg / (3.0f * mass_dim);
113     float rp_norm = primary_rg * primary_rg / (3.0f * tot_dim);
114 
115     //x for P
116     float x_val1 = 1.0f +  q * q * rc_norm;
117     float x_val2 = 1.0f +  q * q * rp_norm;
118 
119     float inv_form = pow(x_val1, mass_dim) * pow(x_val2, tot_dim);
120 
121     //another singular
122     if (inv_form == 0.0f) return 0.0f;
123 
124     float form_factor = 1.0f;
125     form_factor /= inv_form;
126 
127     return (form_factor);
128 }
form_volume(float radius)129 float form_volume(float radius){
130 
131     return 1.333333333333333f*M_PI*radius*radius*radius;
132 }
133 
Iq(float q,float mass_dim,float surface_dim,float cluster_rg,float primary_rg)134 float Iq(float q,
135           float mass_dim,
136           float surface_dim,
137           float cluster_rg,
138           float primary_rg)
139 {
140     return _mass_surface_fractal_kernel(q,
141             mass_dim,
142             surface_dim,
143             cluster_rg,
144             primary_rg);
145 }
146 
147 // Iqxy is never called since no orientation or magnetic parameters.
Iqxy(float qx,float qy,float mass_dim,float surface_dim,float cluster_rg,float primary_rg)148 float Iqxy(float qx, float qy,
149           float mass_dim,
150           float surface_dim,
151           float cluster_rg,
152           float primary_rg)
153 {
154     float q = sqrt(qx*qx + qy*qy);
155     return _mass_surface_fractal_kernel(q,
156            mass_dim,
157            surface_dim,
158            cluster_rg,
159            primary_rg);
160 }
161 
162 
163 
164 /*
165     ##########################################################
166     #                                                        #
167     #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
168     #   !!                                              !!   #
169     #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
170     #   !!                                              !!   #
171     #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
172     #                                                        #
173     ##########################################################
174 */
175 
176 #ifdef IQ_KERNEL_NAME
IQ_KERNEL_NAME(global const float * q,global float * result,const int Nq,global float * loops_g,local float * loops,const float cutoff,IQ_DISPERSION_LENGTH_DECLARATIONS,IQ_FIXED_PARAMETER_DECLARATIONS)177 kernel void IQ_KERNEL_NAME(
178     global const float *q,
179     global float *result,
180     const int Nq,
181 #ifdef IQ_OPEN_LOOPS
182   #ifdef USE_OPENCL
183     global float *loops_g,
184   #endif
185     local float *loops,
186     const float cutoff,
187     IQ_DISPERSION_LENGTH_DECLARATIONS,
188 #endif
189     IQ_FIXED_PARAMETER_DECLARATIONS
190     )
191 {
192 #ifdef USE_OPENCL
193   #ifdef IQ_OPEN_LOOPS
194   // copy loops info to local memory
195   event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
196   wait_group_events(1, &e);
197   #endif
198 
199   int i = get_global_id(0);
200   if (i < Nq)
201 #else
202   #pragma omp parallel for
203   for (int i=0; i < Nq; i++)
204 #endif
205   {
206     const float qi = q[i];
207 #ifdef IQ_OPEN_LOOPS
208     float ret=0.0f, norm=0.0f;
209   #ifdef VOLUME_PARAMETERS
210     float vol=0.0f, norm_vol=0.0f;
211   #endif
212     IQ_OPEN_LOOPS
213     //for (int radius_i=0; radius_i < Nradius; radius_i++) {
214     //  const float radius = loops[2*(radius_i)];
215     //  const float radius_w = loops[2*(radius_i)+1];
216 
217     const float weight = IQ_WEIGHT_PRODUCT;
218     if (weight > cutoff) {
219       const float scattering = Iq(qi, IQ_PARAMETERS);
220       // allow kernels to exclude invalid regions by returning NaN
221       if (!isnan(scattering)) {
222         ret += weight*scattering;
223         norm += weight;
224       #ifdef VOLUME_PARAMETERS
225         const float vol_weight = VOLUME_WEIGHT_PRODUCT;
226         vol += vol_weight*form_volume(VOLUME_PARAMETERS);
227         norm_vol += vol_weight;
228       #endif
229       }
230     //else { printf("exclude qx,qy,I:%g,%g,%g\n",qi,scattering); }
231     }
232     IQ_CLOSE_LOOPS
233   #ifdef VOLUME_PARAMETERS
234     if (vol*norm_vol != 0.0f) {
235       ret *= norm_vol/vol;
236     }
237   #endif
238     result[i] = scale*ret/norm+background;
239 #else
240     result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
241 #endif
242   }
243 }
244 #endif
245 
246 
247 #ifdef IQXY_KERNEL_NAME
IQXY_KERNEL_NAME(global const float * qx,global const float * qy,global float * result,const int Nq,global float * loops_g,local float * loops,const float cutoff,IQXY_DISPERSION_LENGTH_DECLARATIONS,IQXY_FIXED_PARAMETER_DECLARATIONS)248 kernel void IQXY_KERNEL_NAME(
249     global const float *qx,
250     global const float *qy,
251     global float *result,
252     const int Nq,
253 #ifdef IQXY_OPEN_LOOPS
254   #ifdef USE_OPENCL
255     global float *loops_g,
256   #endif
257     local float *loops,
258     const float cutoff,
259     IQXY_DISPERSION_LENGTH_DECLARATIONS,
260 #endif
261     IQXY_FIXED_PARAMETER_DECLARATIONS
262     )
263 {
264 #ifdef USE_OPENCL
265   #ifdef IQXY_OPEN_LOOPS
266   // copy loops info to local memory
267   event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
268   wait_group_events(1, &e);
269   #endif
270 
271   int i = get_global_id(0);
272   if (i < Nq)
273 #else
274   #pragma omp parallel for
275   for (int i=0; i < Nq; i++)
276 #endif
277   {
278     const float qxi = qx[i];
279     const float qyi = qy[i];
280     #if USE_KAHAN_SUMMATION
281     float accumulated_error = 0.0f;
282     #endif
283 #ifdef IQXY_OPEN_LOOPS
284     float ret=0.0f, norm=0.0f;
285     #ifdef VOLUME_PARAMETERS
286     float vol=0.0f, norm_vol=0.0f;
287     #endif
288     IQXY_OPEN_LOOPS
289     //for (int radius_i=0; radius_i < Nradius; radius_i++) {
290     //  const float radius = loops[2*(radius_i)];
291     //  const float radius_w = loops[2*(radius_i)+1];
292 
293     const float weight = IQXY_WEIGHT_PRODUCT;
294     if (weight > cutoff) {
295 
296       const float scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
297       if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
298       //if (scattering >= 0.0f) { // scattering cannot be negative
299         // TODO: use correct angle for spherical correction
300         // Definition of theta and phi are probably reversed relative to the
301         // equation which gave rise to this correction, leading to an
302         // attenuation of the pattern as theta moves through pi/2.f  Either
303         // reverse the meanings of phi and theta in the forms, or use phi
304         // rather than theta in this correction.  Current code uses cos(theta)
305         // so that values match those of sasview.
306       #if defined(IQXY_HAS_THETA) // && 0
307         const float spherical_correction
308           = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0f);
309         const float next = spherical_correction * weight * scattering;
310       #else
311         const float next = weight * scattering;
312       #endif
313       #if USE_KAHAN_SUMMATION
314         const float y = next - accumulated_error;
315         const float t = ret + y;
316         accumulated_error = (t - ret) - y;
317         ret = t;
318       #else
319         ret += next;
320       #endif
321         norm += weight;
322       #ifdef VOLUME_PARAMETERS
323         const float vol_weight = VOLUME_WEIGHT_PRODUCT;
324         vol += vol_weight*form_volume(VOLUME_PARAMETERS);
325       #endif
326         norm_vol += vol_weight;
327       }
328       //else { printf("exclude qx,qy,I:%g,%g,%g\n",qi,scattering); }
329     }
330     IQXY_CLOSE_LOOPS
331   #ifdef VOLUME_PARAMETERS
332     if (vol*norm_vol != 0.0f) {
333       ret *= norm_vol/vol;
334     }
335   #endif
336     result[i] = scale*ret/norm+background;
337 #else
338     result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
339 #endif
340   }
341 }
342 #endif
343