1 // GENERATED CODE --- DO NOT EDIT ---
2 // Code is produced by sasmodels.gen from sasmodels/models/MODEL.c
3 
4 #ifdef __OPENCL_VERSION__
5 # define USE_OPENCL
6 #endif
7 
8 #define USE_KAHAN_SUMMATION 0
9 
10 // If opencl is not available, then we are compiling a C function
11 // Note: if using a C++ compiler, then define kernel as extern "C"
12 #ifndef USE_OPENCL
13 #  ifdef __cplusplus
14      #include <cstdio>
15      #include <cmath>
16      using namespace std;
17      #if defined(_MSC_VER)
18      #   define kernel extern "C" __declspec( dllexport )
trunc(float x)19          inline float trunc(float x) { return x>=0?floor(x):-floor(-x); }
fmin(float x,float y)20 	 inline float fmin(float x, float y) { return x>y ? y : x; }
fmax(float x,float y)21 	 inline float fmax(float x, float y) { return x<y ? y : x; }
22      #else
23      #   define kernel extern "C"
24      #endif
SINCOS(float angle,float & svar,float & cvar)25      inline void SINCOS(float angle, float &svar, float &cvar) { svar=sin(angle); cvar=cos(angle); }
26 #  else
27      #include <stdio.h>
28      #include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
29      // MSVC doesn't support C99, so no need for dllexport on C99 branch
30      #define kernel
31      #define SINCOS(angle,svar,cvar) do {const float _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
32 #  endif
33 #  define global
34 #  define local
35 #  define constant const
36 // OpenCL powr(a,b) = C99 pow(a,b), b >= 0
37 // OpenCL pown(a,b) = C99 pow(a,b), b integer
38 #  define powr(a,b) pow(a,b)
39 #  define pown(a,b) pow(a,b)
40 #else
41 #  ifdef USE_SINCOS
42 #    define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
43 #  else
44 #    define SINCOS(angle,svar,cvar) do {const float _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
45 #  endif
46 #endif
47 
48 // Standard mathematical constants:
49 //   M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
50 //   M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
51 // OpenCL defines M_constant_F for float constants, and nothing if float
52 // is not enabled on the card, which is why these constants may be missing
53 #ifndef M_PI
54 #  define M_PI 3.141592653589793f
55 #endif
56 #ifndef M_PI_2
57 #  define M_PI_2 1.570796326794897f
58 #endif
59 #ifndef M_PI_4
60 #  define M_PI_4 0.7853981633974483f
61 #endif
62 
63 // Non-standard pi/180, used for converting between degrees and radians
64 #ifndef M_PI_180
65 #  define M_PI_180 0.017453292519943295f
66 #endif
67 
68 
69 #define VOLUME_PARAMETERS effect_radius
70 #define VOLUME_WEIGHT_PRODUCT effect_radius_w
71 #define VOLUME_PARAMETER_DECLARATIONS float effect_radius
72 #define IQ_KERNEL_NAME hardsphere_Iq
73 #define IQ_PARAMETERS effect_radius, volfraction
74 #define IQ_FIXED_PARAMETER_DECLARATIONS const float scale, \
75     const float background, \
76     const float volfraction
77 #define IQ_WEIGHT_PRODUCT effect_radius_w
78 #define IQ_DISPERSION_LENGTH_DECLARATIONS const int Neffect_radius
79 #define IQ_DISPERSION_LENGTH_SUM Neffect_radius
80 #define IQ_OPEN_LOOPS     for (int effect_radius_i=0; effect_radius_i < Neffect_radius; effect_radius_i++) { \
81       const float effect_radius = loops[2*(effect_radius_i)]; \
82       const float effect_radius_w = loops[2*(effect_radius_i)+1];
83 #define IQ_CLOSE_LOOPS     }
84 #define IQ_PARAMETER_DECLARATIONS float effect_radius, float volfraction
85 #define IQXY_KERNEL_NAME hardsphere_Iqxy
86 #define IQXY_PARAMETERS effect_radius, volfraction
87 #define IQXY_FIXED_PARAMETER_DECLARATIONS const float scale, \
88     const float background, \
89     const float volfraction
90 #define IQXY_WEIGHT_PRODUCT effect_radius_w
91 #define IQXY_DISPERSION_LENGTH_DECLARATIONS const int Neffect_radius
92 #define IQXY_DISPERSION_LENGTH_SUM Neffect_radius
93 #define IQXY_OPEN_LOOPS     for (int effect_radius_i=0; effect_radius_i < Neffect_radius; effect_radius_i++) { \
94       const float effect_radius = loops[2*(effect_radius_i)]; \
95       const float effect_radius_w = loops[2*(effect_radius_i)+1];
96 #define IQXY_CLOSE_LOOPS     }
97 #define IQXY_PARAMETER_DECLARATIONS float effect_radius, float volfraction
98 
99 float form_volume(VOLUME_PARAMETER_DECLARATIONS);
form_volume(VOLUME_PARAMETER_DECLARATIONS)100 float form_volume(VOLUME_PARAMETER_DECLARATIONS) {
101 
102     return 1.0f;
103 
104 }
105 
106 
107 float Iq(float q, IQ_PARAMETER_DECLARATIONS);
Iq(float q,IQ_PARAMETER_DECLARATIONS)108 float Iq(float q, IQ_PARAMETER_DECLARATIONS) {
109 
110     float denom,dnum,alpha,beta,gamm,a,asq,ath,afor,rca,rsa;
111     float calp,cbeta,cgam,prefac,c,vstruc;
112     float struc;
113 
114     //  compute constants
115     denom = pow((1.0f-volfraction),4);
116     dnum = pow((1.0f + 2.0f*volfraction),2);
117     alpha = dnum/denom;
118     beta = -6.0f*volfraction*pow((1.0f + volfraction/2.0f),2)/denom;
119     gamm = 0.50f*volfraction*dnum/denom;
120     //
121     //  calculate the structure factor
122     //
123     a = 2.0f*q*effect_radius;
124     asq = a*a;
125     ath = asq*a;
126     afor = ath*a;
127     SINCOS(a,rsa,rca);
128     //rca = cos(a);
129     //rsa = sin(a);
130     calp = alpha*(rsa/asq - rca/a);
131     cbeta = beta*(2.0f*rsa/asq - (asq - 2.0f)*rca/ath - 2.0f/ath);
132     cgam = gamm*(-rca/a + (4.0f/a)*((3.0f*asq - 6.0f)*rca/afor + (asq - 6.0f)*rsa/ath + 6.0f/afor));
133     prefac = -24.0f*volfraction/a;
134     c = prefac*(calp + cbeta + cgam);
135     vstruc = 1.0f/(1.0f-c);
136     struc = vstruc;
137 
138     return(struc);
139 
140 }
141 
142 
143 float Iqxy(float qx, float qy, IQXY_PARAMETER_DECLARATIONS);
Iqxy(float qx,float qy,IQXY_PARAMETER_DECLARATIONS)144 float Iqxy(float qx, float qy, IQXY_PARAMETER_DECLARATIONS) {
145 
146     // never called since no orientation or magnetic parameters.
147     return Iq(sqrt(qx*qx+qy*qy), IQ_PARAMETERS);
148 
149 }
150 
151 
152 /*
153     ##########################################################
154     #                                                        #
155     #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
156     #   !!                                              !!   #
157     #   !!  KEEP THIS CODE CONSISTENT WITH KERNELPY.PY  !!   #
158     #   !!                                              !!   #
159     #   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   #
160     #                                                        #
161     ##########################################################
162 */
163 
164 #ifdef IQ_KERNEL_NAME
IQ_KERNEL_NAME(global const float * q,global float * result,const int Nq,global float * loops_g,local float * loops,const float cutoff,IQ_DISPERSION_LENGTH_DECLARATIONS,IQ_FIXED_PARAMETER_DECLARATIONS)165 kernel void IQ_KERNEL_NAME(
166     global const float *q,
167     global float *result,
168     const int Nq,
169 #ifdef IQ_OPEN_LOOPS
170   #ifdef USE_OPENCL
171     global float *loops_g,
172   #endif
173     local float *loops,
174     const float cutoff,
175     IQ_DISPERSION_LENGTH_DECLARATIONS,
176 #endif
177     IQ_FIXED_PARAMETER_DECLARATIONS
178     )
179 {
180 #ifdef USE_OPENCL
181   #ifdef IQ_OPEN_LOOPS
182   // copy loops info to local memory
183   event_t e = async_work_group_copy(loops, loops_g, (IQ_DISPERSION_LENGTH_SUM)*2, 0);
184   wait_group_events(1, &e);
185   #endif
186 
187   int i = get_global_id(0);
188   if (i < Nq)
189 #else
190   #pragma omp parallel for
191   for (int i=0; i < Nq; i++)
192 #endif
193   {
194     const float qi = q[i];
195 #ifdef IQ_OPEN_LOOPS
196     float ret=0.0f, norm=0.0f;
197   #ifdef VOLUME_PARAMETERS
198     float vol=0.0f, norm_vol=0.0f;
199   #endif
200     IQ_OPEN_LOOPS
201     //for (int radius_i=0; radius_i < Nradius; radius_i++) {
202     //  const float radius = loops[2*(radius_i)];
203     //  const float radius_w = loops[2*(radius_i)+1];
204 
205     const float weight = IQ_WEIGHT_PRODUCT;
206     if (weight > cutoff) {
207       const float scattering = Iq(qi, IQ_PARAMETERS);
208       // allow kernels to exclude invalid regions by returning NaN
209       if (!isnan(scattering)) {
210         ret += weight*scattering;
211         norm += weight;
212       #ifdef VOLUME_PARAMETERS
213         const float vol_weight = VOLUME_WEIGHT_PRODUCT;
214         vol += vol_weight*form_volume(VOLUME_PARAMETERS);
215         norm_vol += vol_weight;
216       #endif
217       }
218     //else { printf("exclude qx,qy,I:%g,%g,%g\n",qi,scattering); }
219     }
220     IQ_CLOSE_LOOPS
221   #ifdef VOLUME_PARAMETERS
222     if (vol*norm_vol != 0.0f) {
223       ret *= norm_vol/vol;
224     }
225   #endif
226     result[i] = scale*ret/norm+background;
227 #else
228     result[i] = scale*Iq(qi, IQ_PARAMETERS) + background;
229 #endif
230   }
231 }
232 #endif
233 
234 
235 #ifdef IQXY_KERNEL_NAME
IQXY_KERNEL_NAME(global const float * qx,global const float * qy,global float * result,const int Nq,global float * loops_g,local float * loops,const float cutoff,IQXY_DISPERSION_LENGTH_DECLARATIONS,IQXY_FIXED_PARAMETER_DECLARATIONS)236 kernel void IQXY_KERNEL_NAME(
237     global const float *qx,
238     global const float *qy,
239     global float *result,
240     const int Nq,
241 #ifdef IQXY_OPEN_LOOPS
242   #ifdef USE_OPENCL
243     global float *loops_g,
244   #endif
245     local float *loops,
246     const float cutoff,
247     IQXY_DISPERSION_LENGTH_DECLARATIONS,
248 #endif
249     IQXY_FIXED_PARAMETER_DECLARATIONS
250     )
251 {
252 #ifdef USE_OPENCL
253   #ifdef IQXY_OPEN_LOOPS
254   // copy loops info to local memory
255   event_t e = async_work_group_copy(loops, loops_g, (IQXY_DISPERSION_LENGTH_SUM)*2, 0);
256   wait_group_events(1, &e);
257   #endif
258 
259   int i = get_global_id(0);
260   if (i < Nq)
261 #else
262   #pragma omp parallel for
263   for (int i=0; i < Nq; i++)
264 #endif
265   {
266     const float qxi = qx[i];
267     const float qyi = qy[i];
268     #if USE_KAHAN_SUMMATION
269     float accumulated_error = 0.0f;
270     #endif
271 #ifdef IQXY_OPEN_LOOPS
272     float ret=0.0f, norm=0.0f;
273     #ifdef VOLUME_PARAMETERS
274     float vol=0.0f, norm_vol=0.0f;
275     #endif
276     IQXY_OPEN_LOOPS
277     //for (int radius_i=0; radius_i < Nradius; radius_i++) {
278     //  const float radius = loops[2*(radius_i)];
279     //  const float radius_w = loops[2*(radius_i)+1];
280 
281     const float weight = IQXY_WEIGHT_PRODUCT;
282     if (weight > cutoff) {
283 
284       const float scattering = Iqxy(qxi, qyi, IQXY_PARAMETERS);
285       if (!isnan(scattering)) { // if scattering is bad, exclude it from sum
286       //if (scattering >= 0.0f) { // scattering cannot be negative
287         // TODO: use correct angle for spherical correction
288         // Definition of theta and phi are probably reversed relative to the
289         // equation which gave rise to this correction, leading to an
290         // attenuation of the pattern as theta moves through pi/2.f  Either
291         // reverse the meanings of phi and theta in the forms, or use phi
292         // rather than theta in this correction.  Current code uses cos(theta)
293         // so that values match those of sasview.
294       #if defined(IQXY_HAS_THETA) // && 0
295         const float spherical_correction
296           = (Ntheta>1 ? fabs(cos(M_PI_180*theta))*M_PI_2:1.0f);
297         const float next = spherical_correction * weight * scattering;
298       #else
299         const float next = weight * scattering;
300       #endif
301       #if USE_KAHAN_SUMMATION
302         const float y = next - accumulated_error;
303         const float t = ret + y;
304         accumulated_error = (t - ret) - y;
305         ret = t;
306       #else
307         ret += next;
308       #endif
309         norm += weight;
310       #ifdef VOLUME_PARAMETERS
311         const float vol_weight = VOLUME_WEIGHT_PRODUCT;
312         vol += vol_weight*form_volume(VOLUME_PARAMETERS);
313       #endif
314         norm_vol += vol_weight;
315       }
316       //else { printf("exclude qx,qy,I:%g,%g,%g\n",qi,scattering); }
317     }
318     IQXY_CLOSE_LOOPS
319   #ifdef VOLUME_PARAMETERS
320     if (vol*norm_vol != 0.0f) {
321       ret *= norm_vol/vol;
322     }
323   #endif
324     result[i] = scale*ret/norm+background;
325 #else
326     result[i] = scale*Iqxy(qxi, qyi, IQXY_PARAMETERS) + background;
327 #endif
328   }
329 }
330 #endif
331