1 
2 //-------------------------------------------------------------------
3 //   C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox.
4 //
5 //
6 //   This program is free software; you can redistribute it and/or modify
7 //   it under the terms of the GNU General Public License as published by
8 //   the Free Software Foundation; either version 3 of the License, or
9 //   (at your option) any later version.
10 //
11 //   This program is distributed in the hope that it will be useful,
12 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
13 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 //   GNU General Public License for more details.
15 //
16 //   You should have received a copy of the GNU General Public License
17 //   along with this program; if not, see <http://www.gnu.org/licenses/>.
18 //
19 //
20 // sumskipnan: sums all non-NaN values
21 // usage:
22 //	[o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W);
23 //
24 // SUMSKIPNAN uses two techniques to reduce errors:
25 // 1) long double (80bit) instead of 64-bit double is used internally
26 // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps
27 //        The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension).
28 //
29 // Input:
30 // - x data array
31 // - DIM (optional) dimension to sum
32 // - flag (optional) is actually an output argument telling whether some NaN was observed
33 // - W (optional) weight vector to compute weighted sum (default 1)
34 //
35 // Output:
36 // - o (weighted) sum along dimension DIM
37 // - count of valid elements
38 // - sums of squares
39 //
40 //
41 //    $Id: sumskipnan_mex.cpp 8223 2011-04-20 09:16:06Z schloegl $
42 //    Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com>
43 //    This function is part of the NaN-toolbox
44 //    http://pub.ist.ac.at/~schloegl/matlab/NaN/
45 //
46 //-------------------------------------------------------------------
47 
48 
49 
50 
51 #include <math.h>
52 #include <stdint.h>
53 #include "mex.h"
54 
55 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
56 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
57 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
58 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
59 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
60 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
61 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
62 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
63 
64 //#define NO_FLAG
65 
66 #ifdef tmwtypes_h
67   #if (MX_API_VER<=0x07020000)
68     typedef int mwSize;
69   #endif
70 #endif
71 
72 
mexFunction(int POutputCount,mxArray * POutput[],int PInputCount,const mxArray * PInputs[])73 void mexFunction(int POutputCount,  mxArray* POutput[], int PInputCount, const mxArray *PInputs[])
74 {
75     	const mwSize	*SZ;
76     	double* 	LInput;
77     	double* 	LOutputSum;
78     	double* 	LOutputCount;
79     	double* 	LOutputSum2;
80     	long double* 	LongOutputSum = NULL;
81     	long double* 	LongOutputCount = NULL;
82     	long double* 	LongOutputSum2 = NULL;
83     	double  	x;
84     	double*		W = NULL;		// weight vector
85 
86     	mwSize		DIM = 0;
87     	mwSize		D1, D2, D3; 	// NN; 	//
88     	mwSize    	ND, ND2;	// number of dimensions: input, output
89     	mwSize		ix0, ix1, ix2;	// index to input and output
90     	mwSize    	j, l;		// running indices
91     	mwSize 		*SZ2;		// size of output
92 	char	 	flag_isNaN = 0;
93 
94 	// check for proper number of input and output arguments
95 	if ((PInputCount <= 0) || (PInputCount > 4))
96 	        mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments.");
97 	if (POutputCount > 4)
98 	        mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments.");
99 
100 	// get 1st argument
101 	if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]))
102 		LInput  = mxGetPr(PInputs[0]);
103 	else
104 		mexErrMsgTxt("First argument must be REAL/DOUBLE.");
105 
106     	// get 2nd argument
107     	if  (PInputCount > 1) {
108  	       	switch (mxGetNumberOfElements(PInputs[1])) {
109 		case 0: x = 0.0; 		// accept empty element
110 			break;
111 		case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0);
112 			break;
113 		default:x = -1.0;		// invalid
114 		}
115 		if ((x < 0) || (x > 65535) || (x != floor(x)))
116 			mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar");
117 
118 		DIM = (unsigned)floor(x);
119 	}
120 
121 	// get size
122     	ND = mxGetNumberOfDimensions(PInputs[0]);
123     	// NN = mxGetNumberOfElements(PInputs[0]);
124     	SZ = mxGetDimensions(PInputs[0]);
125 
126 	// if DIM==0 (undefined), look for first dimension with more than 1 element.
127 	for (j = 0; (DIM < 1) && (j < ND); j++)
128 		if (SZ[j]>1) DIM = j+1;
129 
130 	if (DIM < 1) DIM=1;		// in case DIM is still undefined
131 
132 	ND2 = (ND>DIM ? ND : DIM);	// number of dimensions of output
133 
134 	SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size
135 
136 	for (j=0; j<ND; j++)		// copy size of input;
137 		SZ2[j] = SZ[j];
138 	for (j=ND; j<ND2; j++)		// in case DIM > ND, add extra elements 1
139 		SZ2[j] = 1;
140 
141     	for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); 	// D1 is the number of elements between two elements along dimension  DIM
142 	D2 = SZ2[DIM-1];		// D2 contains the size along dimension DIM
143     	for (j=DIM, D3=1;  j<ND; D3=D3*SZ2[j++]); 	// D3 is the number of blocks containing D1*D2 elements
144 
145 	SZ2[DIM-1] = 1;		// size of output is same as size of input but SZ(DIM)=1;
146 
147     	// get weight vector for weighted sumskipnan
148        	if  (PInputCount > 3)	{
149 		if (!mxGetNumberOfElements(PInputs[3]))
150 			; // empty weight vector - no weighting
151 		else if (mxGetNumberOfElements(PInputs[3])==D2)
152 			W = mxGetPr(PInputs[3]);
153 		else
154 			mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension");
155 	}
156 
157 	int ACC_LEVEL = 0;
158 	{
159 		mxArray *LEVEL = NULL;
160 		int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level");
161 		if (!s) {
162 			ACC_LEVEL = (int) mxGetScalar(LEVEL);
163 			if ((D1>1) && (ACC_LEVEL>2))
164 				mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !");
165 		}
166 		mxDestroyArray(LEVEL);
167 	}
168 	// mexPrintf("Accuracy Level=%i\n",ACC_LEVEL);
169 
170 	    // create outputs
171 	#define TYP mxDOUBLE_CLASS
172 
173 	POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL);
174 	LOutputSum = mxGetPr(POutput[0]);
175 	if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double));
176     	if (POutputCount >= 2) {
177 		POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL);
178 		LOutputCount = mxGetPr(POutput[1]);
179 		if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double));
180     	}
181     	if (POutputCount >= 3) {
182 		POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL);
183         	LOutputSum2  = mxGetPr(POutput[2]);
184 		if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double));
185     	}
186 	mxFree(SZ2);
187 
188 
189 	if (D1*D2*D3<1) // zero size array
190 		; 	// do nothing
191 	else if ((D1==1) && (ACC_LEVEL<1)) {
192 		// double accuray, naive summation, error = N*2^-52
193 		switch (POutputCount) {
194 		case 1:
195 			#pragma omp parallel for schedule(dynamic)
196 			for (l = 0; l<D3; l++) {
197 				double count;
198 				__sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W);
199 			}
200 			break;
201 		case 2:
202 			#pragma omp parallel for schedule(dynamic)
203 			for (l = 0; l<D3; l++) {
204 				__sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W);
205 			}
206 			break;
207 		case 3:
208 			#pragma omp parallel for schedule(dynamic)
209 			for (l = 0; l<D3; l++) {
210 				__sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W);
211 			}
212 			break;
213 		}
214 	}
215 	else if ((D1==1) && (ACC_LEVEL==1)) {
216 		// extended accuray, naive summation, error = N*2^-64
217 		switch (POutputCount) {
218 		case 1:
219 			#pragma omp parallel for schedule(dynamic)
220 			for (l = 0; l<D3; l++) {
221 				double count;
222 				__sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W);
223 			}
224 			break;
225 		case 2:
226 			#pragma omp parallel for schedule(dynamic)
227 			for (l = 0; l<D3; l++) {
228 				__sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W);
229 			}
230 			break;
231 		case 3:
232 			#pragma omp parallel for schedule(dynamic)
233 			for (l = 0; l<D3; l++) {
234 				__sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W);
235 			}
236 			break;
237 		}
238 	}
239 	else if ((D1==1) && (ACC_LEVEL==3)) {
240 		// ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64
241 		switch (POutputCount) {
242 		case 1:
243 			#pragma omp parallel for schedule(dynamic)
244 			for (l = 0; l<D3; l++) {
245 				double count;
246 				__sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W);
247 			}
248 			break;
249 		case 2:
250 			#pragma omp parallel for schedule(dynamic)
251 			for (l = 0; l<D3; l++) {
252 				__sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W);
253 			}
254 			break;
255 		case 3:
256 			#pragma omp parallel for schedule(dynamic)
257 			for (l = 0; l<D3; l++) {
258 				__sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W);
259 			}
260 			break;
261 		}
262 	}
263 	else if ((D1==1) && (ACC_LEVEL==2)) {
264 		// ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52
265 		switch (POutputCount) {
266 		case 1:
267 			#pragma omp parallel for schedule(dynamic)
268 			for (l = 0; l<D3; l++) {
269 				double count;
270 				__sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W);
271 			}
272 			break;
273 		case 2:
274 			#pragma omp parallel for schedule(dynamic)
275 			for (l = 0; l<D3; l++) {
276 				__sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W);
277 			}
278 			break;
279 		case 3:
280 			#pragma omp parallel for schedule(dynamic)
281 			for (l = 0; l<D3; l++) {
282 				__sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W);
283 			}
284 			break;
285 		}
286 	}
287 	else if (POutputCount <= 1) {
288 		// OUTER LOOP: along dimensions > DIM
289  		for (l = 0; l<D3; l++) {
290 			ix0 = l*D1; 	// index for output
291 			ix1 = ix0*D2;	// index for input
292 			for (j=0; j<D2; j++) {
293 				// minimize cache misses
294 				ix2 =   ix0;	// index for output
295 				// Inner LOOP: along dimensions < DIM
296 				if (W) do {
297 					long double x = *LInput;
298         				if (!isnan(x)) {
299 						LongOutputSum[ix2]   += W[j]*x;
300 					}
301 #ifndef NO_FLAG
302 					else
303 						flag_isNaN = 1;
304 #endif
305 					LInput++;
306 					ix2++;
307 				} while (ix2 != (l+1)*D1);
308 				else do {
309 					long double x = *LInput;
310         				if (!isnan(x)) {
311 						LongOutputSum[ix2]   += x;
312 					}
313 #ifndef NO_FLAG
314 					else
315 						flag_isNaN = 1;
316 #endif
317 					LInput++;
318 					ix2++;
319 				} while (ix2 != (l+1)*D1);
320 			}	//	end for (j=
321 
322                         /* copy to output */
323                		for (j=0; j<D1; j++) {
324 				LOutputSum[ix0+j] = LongOutputSum[ix0+j];
325 			}
326                	}
327 	}
328 
329 	else if (POutputCount == 2) {
330 		// OUTER LOOP: along dimensions > DIM
331 		for (l = 0; l<D3; l++) {
332 			ix0 = l*D1;
333 			ix1 = ix0*D2;	// index for input
334 			for (j=0; j<D2; j++) {
335 				// minimize cache misses
336 				ix2 =   ix0;	// index for output
337 				// Inner LOOP: along dimensions < DIM
338 				if (W) do {
339 					long double x = *LInput;
340         				if (!isnan(x)) {
341 						LongOutputCount[ix2] += W[j];
342 						LongOutputSum[ix2]   += W[j]*x;
343 					}
344 #ifndef NO_FLAG
345 					else
346 						flag_isNaN = 1;
347 #endif
348 					LInput++;
349 					ix2++;
350 				} while (ix2 != (l+1)*D1);
351 				else do {
352 					long double x = *LInput;
353         				if (!isnan(x)) {
354 						LongOutputCount[ix2] += 1.0;
355 						LongOutputSum[ix2]   += x;
356 					}
357 #ifndef NO_FLAG
358 					else
359 						flag_isNaN = 1;
360 #endif
361 					LInput++;
362 					ix2++;
363 				} while (ix2 != (l+1)*D1);
364 			}	//	end for (j=
365 
366                                 /* copy to output */
367 	               	for (j=0; j<D1; j++) {
368 				LOutputSum[ix0+j]   = LongOutputSum[ix0+j];
369 				LOutputCount[ix0+j] = LongOutputCount[ix0+j];
370 	       		}	// 	end else
371                	}
372 	}
373 
374 	else if (POutputCount == 3) {
375 		// OUTER LOOP: along dimensions > DIM
376 		for (l = 0; l<D3; l++) {
377 			ix0 = l*D1;
378 			ix1 = ix0*D2;	// index for input
379 			for (j=0; j<D2; j++) {
380 				// minimize cache misses
381 				ix2 =   ix0;	// index for output
382 				// Inner LOOP: along dimensions < DIM
383 				if (W) do {
384 					long double x = *LInput;
385         				if (!isnan(x)) {
386 						LongOutputCount[ix2] += W[j];
387 						double t = W[j]*x;
388 						LongOutputSum[ix2]   += t;
389 						LongOutputSum2[ix2]  += x*t;
390 					}
391 #ifndef NO_FLAG
392 					else
393 						flag_isNaN = 1;
394 #endif
395 					LInput++;
396 					ix2++;
397 				} while (ix2 != (l+1)*D1);
398 				else do {
399 					long double x = *LInput;
400         				if (!isnan(x)) {
401 						LongOutputCount[ix2] += 1.0;
402 						LongOutputSum[ix2]   += x;
403 						LongOutputSum2[ix2]  += x*x;
404 					}
405 #ifndef NO_FLAG
406 					else
407 						flag_isNaN = 1;
408 #endif
409 					LInput++;
410 					ix2++;
411 				} while (ix2 != (l+1)*D1);
412 			}	//	end for (j=
413 
414                        /* copy to output */
415 	        	for (j=0; j<D1; j++) {
416 				LOutputSum[ix0+j]   = LongOutputSum[ix0+j];
417 				LOutputCount[ix0+j] = LongOutputCount[ix0+j];
418 				LOutputSum2[ix0+j]  = LongOutputSum2[ix0+j];
419 			}
420                	}
421 	}
422 
423 	if (LongOutputSum) mxFree(LongOutputSum);
424 	if (LongOutputCount) mxFree(LongOutputCount);
425 	if (LongOutputSum2) mxFree(LongOutputSum2);
426 
427 #ifndef NO_FLAG
428 	//mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned.");
429 	/* this is a hack, the third input argument is used to return whether a NaN occured or not.
430 		this requires that the input argument is a non-empty variable
431 	*/
432 	if  (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) {
433     		// set FLAG_NANS_OCCURED
434     		switch (mxGetClassID(PInputs[2])) {
435     		case mxLOGICAL_CLASS:
436     		case mxCHAR_CLASS:
437     		case mxINT8_CLASS:
438     		case mxUINT8_CLASS:
439     			*(uint8_t*)mxGetData(PInputs[2]) = 1;
440     			break;
441     		case mxDOUBLE_CLASS:
442     			*(double*)mxGetData(PInputs[2]) = 1.0;
443     			break;
444     		case mxSINGLE_CLASS:
445     			*(float*)mxGetData(PInputs[2]) = 1.0;
446     			break;
447     		case mxINT16_CLASS:
448     		case mxUINT16_CLASS:
449     			*(uint16_t*)mxGetData(PInputs[2]) = 1;
450     			break;
451     		case mxINT32_CLASS:
452     		case mxUINT32_CLASS:
453     			*(uint32_t*)mxGetData(PInputs[2])= 1;
454     			break;
455     		case mxINT64_CLASS:
456     		case mxUINT64_CLASS:
457     			*(uint64_t*)mxGetData(PInputs[2]) = 1;
458     			break;
459     		case mxFUNCTION_CLASS:
460     		case mxUNKNOWN_CLASS:
461     		case mxCELL_CLASS:
462     		case mxSTRUCT_CLASS:
463     		default:
464     			mexPrintf("Type of 3rd input argument not supported.");
465 		}
466 	}
467 #endif
468 }
469 
470 #define stride 1
__sumskipnan2w__(double * data,size_t Ni,double * s,double * No,char * flag_anyISNAN,double * W)471 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
472 {
473 	long double sum=0;
474 	char   flag=0;
475 	// LOOP  along dimension DIM
476 
477 	double *end = data + stride*Ni;
478 	if (W) {
479 		// with weight vector
480 		long double count = 0.0;
481 		do {
482 			long double x = *data;
483         		if (!isnan(x))
484 			{
485 				count += *W;
486 				sum   += *W*x;
487 			}
488 #ifndef NO_FLAG
489 			else
490 				flag = 1;
491 #endif
492 
493 			data++;	// stride=1
494 			W++;
495 		}
496 		while (data < end);
497 	        *No = count;
498 	} else {
499 		// w/o weight vector
500 		size_t countI = 0;
501 		do {
502 			long double x = *data;
503         		if (!isnan(x))
504 			{
505 				countI++;
506 				sum += x;
507 			}
508 #ifndef NO_FLAG
509 			else
510 				flag = 1;
511 #endif
512 			data++;	// stride=1
513 		}
514 		while (data < end);
515 	        *No = (double)countI;
516 	}
517 
518 #ifndef NO_FLAG
519 	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
520 #endif
521 	*s  = sum;
522 
523 }
524 
525 
__sumskipnan3w__(double * data,size_t Ni,double * s,double * s2,double * No,char * flag_anyISNAN,double * W)526 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
527 {
528 	long double sum=0;
529 	long double msq=0;
530 	char   flag=0;
531 	// LOOP  along dimension DIM
532 
533 	double *end = data + stride*Ni;
534 	if (W) {
535 		// with weight vector
536 		long double count = 0.0;
537 		do {
538 			long double x = *data;
539         		if (!isnan(x)) {
540 				count += *W;
541 				long double t = *W*x;
542 				sum += t;
543 				msq += x*t;
544 			}
545 #ifndef NO_FLAG
546 			else
547 				flag = 1;
548 #endif
549 			data++;	// stride=1
550 			W++;
551 		}
552 		while (data < end);
553 	        *No = count;
554 	} else {
555 		// w/o weight vector
556 		size_t countI = 0;
557 		do {
558 			long double x = *data;
559         		if (!isnan(x)) {
560 				countI++;
561 				sum += x;
562 				msq += x*x;
563 			}
564 #ifndef NO_FLAG
565 			else
566 				flag = 1;
567 #endif
568 			data++;	// stride=1
569 		}
570 		while (data < end);
571 	        *No = (double)countI;
572 	}
573 
574 #ifndef NO_FLAG
575 	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
576 #endif
577 	*s  = sum;
578 	*s2 = msq;
579 }
580 
__sumskipnan2wr__(double * data,size_t Ni,double * s,double * No,char * flag_anyISNAN,double * W)581 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
582 {
583 	double sum=0;
584 	char   flag=0;
585 	// LOOP  along dimension DIM
586 
587 	double *end = data + stride*Ni;
588 	if (W) {
589 		// with weight vector
590 		double count = 0.0;
591 		do {
592 			double x = *data;
593         		if (!isnan(x))
594 			{
595 				count += *W;
596 				sum   += *W*x;
597 			}
598 #ifndef NO_FLAG
599 			else
600 				flag = 1;
601 #endif
602 
603 			data++;	// stride=1
604 			W++;
605 		}
606 		while (data < end);
607 	        *No = count;
608 	} else {
609 		// w/o weight vector
610 		size_t countI = 0;
611 		do {
612 			double x = *data;
613         		if (!isnan(x))
614 			{
615 				countI++;
616 				sum += x;
617 			}
618 #ifndef NO_FLAG
619 			else
620 				flag = 1;
621 #endif
622 			data++;	// stride=1
623 		}
624 		while (data < end);
625 	        *No = (double)countI;
626 	}
627 
628 #ifndef NO_FLAG
629 	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
630 #endif
631 	*s  = sum;
632 
633 }
634 
635 
__sumskipnan3wr__(double * data,size_t Ni,double * s,double * s2,double * No,char * flag_anyISNAN,double * W)636 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
637 {
638 	double sum=0;
639 	double msq=0;
640 	char   flag=0;
641 	// LOOP  along dimension DIM
642 
643 	double *end = data + stride*Ni;
644 	if (W) {
645 		// with weight vector
646 		double count = 0.0;
647 		do {
648 			double x = *data;
649         		if (!isnan(x)) {
650 				count += *W;
651 				double t = *W*x;
652 				sum += t;
653 				msq += x*t;
654 			}
655 #ifndef NO_FLAG
656 			else
657 				flag = 1;
658 #endif
659 			data++;	// stride=1
660 			W++;
661 		}
662 		while (data < end);
663 	        *No = count;
664 	} else {
665 		// w/o weight vector
666 		size_t countI = 0;
667 		do {
668 			double x = *data;
669         		if (!isnan(x)) {
670 				countI++;
671 				sum += x;
672 				msq += x*x;
673 			}
674 #ifndef NO_FLAG
675 			else
676 				flag = 1;
677 #endif
678 			data++;	// stride=1
679 		}
680 		while (data < end);
681 	        *No = (double)countI;
682 	}
683 
684 #ifndef NO_FLAG
685 	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
686 #endif
687 	*s  = sum;
688 	*s2 = msq;
689 }
690 
691 
692 
693 /***************************************
694         using Kahan's summation formula [1]
695         this gives more accurate results while the computational effort within the loop is about 4x as high
696         First tests show a penalty of about 40% in terms of computational time.
697 
698         [1] David Goldberg,
699         What Every Computer Scientist Should Know About Floating-Point Arithmetic
700         ACM Computing Surveys, Vol 23, No 1, March 1991.
701  ****************************************/
702 
__sumskipnan2we__(double * data,size_t Ni,double * s,double * No,char * flag_anyISNAN,double * W)703 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
704 {
705 	long double sum=0;
706 	char   flag=0;
707 	// LOOP  along dimension DIM
708 
709 	double *end = data + stride*Ni;
710 	if (W) {
711 		// with weight vector
712 		long double count = 0.0;
713 		long double rc=0.0, rn=0.0;
714 		do {
715 			long double x = *data;
716 		        long double t,y;
717         		if (!isnan(x))
718 			{
719 				//count += *W; [1]
720         			y = *W-rn;
721         			t = count+y;
722                			rn= (t-count)-y;
723 		        	count= t;
724 
725 				//sum   += *W*x; [1]
726         			y = *W*x-rc;
727         			t = sum+y;
728                			rc= (t-sum)-y;
729 		        	sum= t;
730 			}
731 #ifndef NO_FLAG
732 			else
733 				flag = 1;
734 #endif
735 
736 			data++;	// stride=1
737 			W++;
738 		}
739 		while (data < end);
740 	        *No = count;
741 	} else {
742 		// w/o weight vector
743 		size_t countI = 0;
744 		long double rc=0.0;
745 		do {
746 			long double x = *data;
747 		        long double t,y;
748         		if (!isnan(x))
749 			{
750 				countI++;
751 				// sum += x; [1]
752         			y = x-rc;
753         			t = sum+y;
754                			rc= (t-sum)-y;
755 		        	sum= t;
756 			}
757 #ifndef NO_FLAG
758 			else
759 				flag = 1;
760 #endif
761 			data++;	// stride=1
762 		}
763 		while (data < end);
764 	        *No = (double)countI;
765 	}
766 
767 #ifndef NO_FLAG
768 	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
769 #endif
770 	*s  = sum;
771 
772 }
773 
774 
__sumskipnan3we__(double * data,size_t Ni,double * s,double * s2,double * No,char * flag_anyISNAN,double * W)775 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
776 {
777 	long double sum=0;
778 	long double msq=0;
779 	char   flag=0;
780 	// LOOP  along dimension DIM
781 
782 	double *end = data + stride*Ni;
783 	if (W) {
784 		// with weight vector
785 		long double count = 0.0;
786         	long double rc=0.0, rn=0.0, rq=0.0;
787 		do {
788 			long double x = *data;
789 		        long double t,y;
790         		if (!isnan(x)) {
791 				//count += *W; [1]
792         			y = *W-rn;
793         			t = count+y;
794                			rn= (t-count)-y;
795 		        	count= t;
796 
797 				long double w = *W*x;
798 				//sum   += *W*x; [1]
799         			y = *W*x-rc;
800         			t = sum+y;
801                			rc= (t-sum)-y;
802 		        	sum= t;
803 
804 				// msq += x*w;
805         			y = w*x-rq;
806         			t = msq+y;
807                			rq= (t-msq)-y;
808 		        	msq= t;
809 			}
810 #ifndef NO_FLAG
811 			else
812 				flag = 1;
813 #endif
814 			data++;	// stride=1
815 			W++;
816 		}
817 		while (data < end);
818 	        *No = count;
819 	} else {
820 		// w/o weight vector
821 		size_t countI = 0;
822         	long double rc=0.0, rq=0.0;
823 		do {
824 			long double x = *data;
825 		        long double t,y;
826         		if (!isnan(x)) {
827 				countI++;
828 				//sum   += x; [1]
829         			y = x-rc;
830         			t = sum+y;
831                			rc= (t-sum)-y;
832 		        	sum= t;
833 
834 				// msq += x*x;
835         			y = x*x-rq;
836         			t = msq+y;
837                			rq= (t-msq)-y;
838 		        	msq= t;
839 			}
840 #ifndef NO_FLAG
841 			else
842 				flag = 1;
843 #endif
844 			data++;	// stride=1
845 		}
846 		while (data < end);
847 	        *No = (double)countI;
848 	}
849 
850 #ifndef NO_FLAG
851 	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
852 #endif
853 	*s  = sum;
854 	*s2 = msq;
855 }
856 
__sumskipnan2wer__(double * data,size_t Ni,double * s,double * No,char * flag_anyISNAN,double * W)857 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
858 {
859 	double sum=0;
860 	char   flag=0;
861 	// LOOP  along dimension DIM
862 
863 	double *end = data + stride*Ni;
864 	if (W) {
865 		// with weight vector
866 		double count = 0.0;
867 		double rc=0.0, rn=0.0;
868 		do {
869 			double x = *data;
870 		        double t,y;
871         		if (!isnan(x))
872 			{
873 				//count += *W; [1]
874         			y = *W-rn;
875         			t = count+y;
876                			rn= (t-count)-y;
877 		        	count= t;
878 
879 				//sum   += *W*x; [1]
880         			y = *W*x-rc;
881         			t = sum+y;
882                			rc= (t-sum)-y;
883 		        	sum= t;
884 			}
885 #ifndef NO_FLAG
886 			else
887 				flag = 1;
888 #endif
889 
890 			data++;	// stride=1
891 			W++;
892 		}
893 		while (data < end);
894 	        *No = count;
895 	} else {
896 		// w/o weight vector
897 		size_t countI = 0;
898 		double rc=0.0;
899 		do {
900 			double x = *data;
901 		        double t,y;
902         		if (!isnan(x))
903 			{
904 				countI++;
905 				// sum += x; [1]
906         			y = x-rc;
907         			t = sum+y;
908                			rc= (t-sum)-y;
909 		        	sum= t;
910 			}
911 #ifndef NO_FLAG
912 			else
913 				flag = 1;
914 #endif
915 			data++;	// stride=1
916 		}
917 		while (data < end);
918 	        *No = (double)countI;
919 	}
920 
921 #ifndef NO_FLAG
922 	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
923 #endif
924 	*s  = sum;
925 
926 }
927 
928 
__sumskipnan3wer__(double * data,size_t Ni,double * s,double * s2,double * No,char * flag_anyISNAN,double * W)929 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
930 {
931 	double sum=0;
932 	double msq=0;
933 	char   flag=0;
934 	// LOOP  along dimension DIM
935 
936 	double *end = data + stride*Ni;
937 	if (W) {
938 		// with weight vector
939 		double count = 0.0;
940         	double rc=0.0, rn=0.0, rq=0.0;
941 		do {
942 			double x = *data;
943 		        double t,y;
944         		if (!isnan(x)) {
945 				//count += *W; [1]
946         			y = *W-rn;
947         			t = count+y;
948                			rn= (t-count)-y;
949 		        	count= t;
950 
951 				double w = *W*x;
952 				//sum   += *W*x; [1]
953         			y = *W*x-rc;
954         			t = sum+y;
955                			rc= (t-sum)-y;
956 		        	sum= t;
957 
958 				// msq += x*w;
959         			y = w*x-rq;
960         			t = msq+y;
961                			rq= (t-msq)-y;
962 		        	msq= t;
963 			}
964 #ifndef NO_FLAG
965 			else
966 				flag = 1;
967 #endif
968 			data++;	// stride=1
969 			W++;
970 		}
971 		while (data < end);
972 	        *No = count;
973 	} else {
974 		// w/o weight vector
975 		size_t countI = 0;
976         	double rc=0.0, rq=0.0;
977 		do {
978 			double x = *data;
979 		        double t,y;
980         		if (!isnan(x)) {
981 				countI++;
982 				//sum   += x; [1]
983         			y = x-rc;
984         			t = sum+y;
985                			rc= (t-sum)-y;
986 		        	sum= t;
987 
988 				// msq += x*x;
989         			y = x*x-rq;
990         			t = msq+y;
991                			rq= (t-msq)-y;
992 		        	msq= t;
993 			}
994 #ifndef NO_FLAG
995 			else
996 				flag = 1;
997 #endif
998 			data++;	// stride=1
999 		}
1000 		while (data < end);
1001 	        *No = (double)countI;
1002 	}
1003 
1004 #ifndef NO_FLAG
1005 	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
1006 #endif
1007 	*s  = sum;
1008 	*s2 = msq;
1009 }
1010 
1011