1
2 //-------------------------------------------------------------------
3 // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox.
4 //
5 //
6 // This program is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3 of the License, or
9 // (at your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, see <http://www.gnu.org/licenses/>.
18 //
19 //
20 // sumskipnan: sums all non-NaN values
21 // usage:
22 // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W);
23 //
24 // SUMSKIPNAN uses two techniques to reduce errors:
25 // 1) long double (80bit) instead of 64-bit double is used internally
26 // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps
27 // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension).
28 //
29 // Input:
30 // - x data array
31 // - DIM (optional) dimension to sum
32 // - flag (optional) is actually an output argument telling whether some NaN was observed
33 // - W (optional) weight vector to compute weighted sum (default 1)
34 //
35 // Output:
36 // - o (weighted) sum along dimension DIM
37 // - count of valid elements
38 // - sums of squares
39 //
40 //
41 // $Id: sumskipnan_mex.cpp 8223 2011-04-20 09:16:06Z schloegl $
42 // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com>
43 // This function is part of the NaN-toolbox
44 // http://pub.ist.ac.at/~schloegl/matlab/NaN/
45 //
46 //-------------------------------------------------------------------
47
48
49
50
51 #include <math.h>
52 #include <stdint.h>
53 #include "mex.h"
54
55 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
56 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
57 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
58 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
59 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
60 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
61 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
62 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
63
64 //#define NO_FLAG
65
66 #ifdef tmwtypes_h
67 #if (MX_API_VER<=0x07020000)
68 typedef int mwSize;
69 #endif
70 #endif
71
72
mexFunction(int POutputCount,mxArray * POutput[],int PInputCount,const mxArray * PInputs[])73 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[])
74 {
75 const mwSize *SZ;
76 double* LInput;
77 double* LOutputSum;
78 double* LOutputCount;
79 double* LOutputSum2;
80 long double* LongOutputSum = NULL;
81 long double* LongOutputCount = NULL;
82 long double* LongOutputSum2 = NULL;
83 double x;
84 double* W = NULL; // weight vector
85
86 mwSize DIM = 0;
87 mwSize D1, D2, D3; // NN; //
88 mwSize ND, ND2; // number of dimensions: input, output
89 mwSize ix0, ix1, ix2; // index to input and output
90 mwSize j, l; // running indices
91 mwSize *SZ2; // size of output
92 char flag_isNaN = 0;
93
94 // check for proper number of input and output arguments
95 if ((PInputCount <= 0) || (PInputCount > 4))
96 mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments.");
97 if (POutputCount > 4)
98 mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments.");
99
100 // get 1st argument
101 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]))
102 LInput = mxGetPr(PInputs[0]);
103 else
104 mexErrMsgTxt("First argument must be REAL/DOUBLE.");
105
106 // get 2nd argument
107 if (PInputCount > 1) {
108 switch (mxGetNumberOfElements(PInputs[1])) {
109 case 0: x = 0.0; // accept empty element
110 break;
111 case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0);
112 break;
113 default:x = -1.0; // invalid
114 }
115 if ((x < 0) || (x > 65535) || (x != floor(x)))
116 mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar");
117
118 DIM = (unsigned)floor(x);
119 }
120
121 // get size
122 ND = mxGetNumberOfDimensions(PInputs[0]);
123 // NN = mxGetNumberOfElements(PInputs[0]);
124 SZ = mxGetDimensions(PInputs[0]);
125
126 // if DIM==0 (undefined), look for first dimension with more than 1 element.
127 for (j = 0; (DIM < 1) && (j < ND); j++)
128 if (SZ[j]>1) DIM = j+1;
129
130 if (DIM < 1) DIM=1; // in case DIM is still undefined
131
132 ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output
133
134 SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size
135
136 for (j=0; j<ND; j++) // copy size of input;
137 SZ2[j] = SZ[j];
138 for (j=ND; j<ND2; j++) // in case DIM > ND, add extra elements 1
139 SZ2[j] = 1;
140
141 for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); // D1 is the number of elements between two elements along dimension DIM
142 D2 = SZ2[DIM-1]; // D2 contains the size along dimension DIM
143 for (j=DIM, D3=1; j<ND; D3=D3*SZ2[j++]); // D3 is the number of blocks containing D1*D2 elements
144
145 SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1;
146
147 // get weight vector for weighted sumskipnan
148 if (PInputCount > 3) {
149 if (!mxGetNumberOfElements(PInputs[3]))
150 ; // empty weight vector - no weighting
151 else if (mxGetNumberOfElements(PInputs[3])==D2)
152 W = mxGetPr(PInputs[3]);
153 else
154 mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension");
155 }
156
157 int ACC_LEVEL = 0;
158 {
159 mxArray *LEVEL = NULL;
160 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level");
161 if (!s) {
162 ACC_LEVEL = (int) mxGetScalar(LEVEL);
163 if ((D1>1) && (ACC_LEVEL>2))
164 mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !");
165 }
166 mxDestroyArray(LEVEL);
167 }
168 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL);
169
170 // create outputs
171 #define TYP mxDOUBLE_CLASS
172
173 POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL);
174 LOutputSum = mxGetPr(POutput[0]);
175 if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double));
176 if (POutputCount >= 2) {
177 POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL);
178 LOutputCount = mxGetPr(POutput[1]);
179 if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double));
180 }
181 if (POutputCount >= 3) {
182 POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL);
183 LOutputSum2 = mxGetPr(POutput[2]);
184 if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double));
185 }
186 mxFree(SZ2);
187
188
189 if (D1*D2*D3<1) // zero size array
190 ; // do nothing
191 else if ((D1==1) && (ACC_LEVEL<1)) {
192 // double accuray, naive summation, error = N*2^-52
193 switch (POutputCount) {
194 case 1:
195 #pragma omp parallel for schedule(dynamic)
196 for (l = 0; l<D3; l++) {
197 double count;
198 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W);
199 }
200 break;
201 case 2:
202 #pragma omp parallel for schedule(dynamic)
203 for (l = 0; l<D3; l++) {
204 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W);
205 }
206 break;
207 case 3:
208 #pragma omp parallel for schedule(dynamic)
209 for (l = 0; l<D3; l++) {
210 __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W);
211 }
212 break;
213 }
214 }
215 else if ((D1==1) && (ACC_LEVEL==1)) {
216 // extended accuray, naive summation, error = N*2^-64
217 switch (POutputCount) {
218 case 1:
219 #pragma omp parallel for schedule(dynamic)
220 for (l = 0; l<D3; l++) {
221 double count;
222 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W);
223 }
224 break;
225 case 2:
226 #pragma omp parallel for schedule(dynamic)
227 for (l = 0; l<D3; l++) {
228 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W);
229 }
230 break;
231 case 3:
232 #pragma omp parallel for schedule(dynamic)
233 for (l = 0; l<D3; l++) {
234 __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W);
235 }
236 break;
237 }
238 }
239 else if ((D1==1) && (ACC_LEVEL==3)) {
240 // ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64
241 switch (POutputCount) {
242 case 1:
243 #pragma omp parallel for schedule(dynamic)
244 for (l = 0; l<D3; l++) {
245 double count;
246 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W);
247 }
248 break;
249 case 2:
250 #pragma omp parallel for schedule(dynamic)
251 for (l = 0; l<D3; l++) {
252 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W);
253 }
254 break;
255 case 3:
256 #pragma omp parallel for schedule(dynamic)
257 for (l = 0; l<D3; l++) {
258 __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W);
259 }
260 break;
261 }
262 }
263 else if ((D1==1) && (ACC_LEVEL==2)) {
264 // ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52
265 switch (POutputCount) {
266 case 1:
267 #pragma omp parallel for schedule(dynamic)
268 for (l = 0; l<D3; l++) {
269 double count;
270 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W);
271 }
272 break;
273 case 2:
274 #pragma omp parallel for schedule(dynamic)
275 for (l = 0; l<D3; l++) {
276 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W);
277 }
278 break;
279 case 3:
280 #pragma omp parallel for schedule(dynamic)
281 for (l = 0; l<D3; l++) {
282 __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W);
283 }
284 break;
285 }
286 }
287 else if (POutputCount <= 1) {
288 // OUTER LOOP: along dimensions > DIM
289 for (l = 0; l<D3; l++) {
290 ix0 = l*D1; // index for output
291 ix1 = ix0*D2; // index for input
292 for (j=0; j<D2; j++) {
293 // minimize cache misses
294 ix2 = ix0; // index for output
295 // Inner LOOP: along dimensions < DIM
296 if (W) do {
297 long double x = *LInput;
298 if (!isnan(x)) {
299 LongOutputSum[ix2] += W[j]*x;
300 }
301 #ifndef NO_FLAG
302 else
303 flag_isNaN = 1;
304 #endif
305 LInput++;
306 ix2++;
307 } while (ix2 != (l+1)*D1);
308 else do {
309 long double x = *LInput;
310 if (!isnan(x)) {
311 LongOutputSum[ix2] += x;
312 }
313 #ifndef NO_FLAG
314 else
315 flag_isNaN = 1;
316 #endif
317 LInput++;
318 ix2++;
319 } while (ix2 != (l+1)*D1);
320 } // end for (j=
321
322 /* copy to output */
323 for (j=0; j<D1; j++) {
324 LOutputSum[ix0+j] = LongOutputSum[ix0+j];
325 }
326 }
327 }
328
329 else if (POutputCount == 2) {
330 // OUTER LOOP: along dimensions > DIM
331 for (l = 0; l<D3; l++) {
332 ix0 = l*D1;
333 ix1 = ix0*D2; // index for input
334 for (j=0; j<D2; j++) {
335 // minimize cache misses
336 ix2 = ix0; // index for output
337 // Inner LOOP: along dimensions < DIM
338 if (W) do {
339 long double x = *LInput;
340 if (!isnan(x)) {
341 LongOutputCount[ix2] += W[j];
342 LongOutputSum[ix2] += W[j]*x;
343 }
344 #ifndef NO_FLAG
345 else
346 flag_isNaN = 1;
347 #endif
348 LInput++;
349 ix2++;
350 } while (ix2 != (l+1)*D1);
351 else do {
352 long double x = *LInput;
353 if (!isnan(x)) {
354 LongOutputCount[ix2] += 1.0;
355 LongOutputSum[ix2] += x;
356 }
357 #ifndef NO_FLAG
358 else
359 flag_isNaN = 1;
360 #endif
361 LInput++;
362 ix2++;
363 } while (ix2 != (l+1)*D1);
364 } // end for (j=
365
366 /* copy to output */
367 for (j=0; j<D1; j++) {
368 LOutputSum[ix0+j] = LongOutputSum[ix0+j];
369 LOutputCount[ix0+j] = LongOutputCount[ix0+j];
370 } // end else
371 }
372 }
373
374 else if (POutputCount == 3) {
375 // OUTER LOOP: along dimensions > DIM
376 for (l = 0; l<D3; l++) {
377 ix0 = l*D1;
378 ix1 = ix0*D2; // index for input
379 for (j=0; j<D2; j++) {
380 // minimize cache misses
381 ix2 = ix0; // index for output
382 // Inner LOOP: along dimensions < DIM
383 if (W) do {
384 long double x = *LInput;
385 if (!isnan(x)) {
386 LongOutputCount[ix2] += W[j];
387 double t = W[j]*x;
388 LongOutputSum[ix2] += t;
389 LongOutputSum2[ix2] += x*t;
390 }
391 #ifndef NO_FLAG
392 else
393 flag_isNaN = 1;
394 #endif
395 LInput++;
396 ix2++;
397 } while (ix2 != (l+1)*D1);
398 else do {
399 long double x = *LInput;
400 if (!isnan(x)) {
401 LongOutputCount[ix2] += 1.0;
402 LongOutputSum[ix2] += x;
403 LongOutputSum2[ix2] += x*x;
404 }
405 #ifndef NO_FLAG
406 else
407 flag_isNaN = 1;
408 #endif
409 LInput++;
410 ix2++;
411 } while (ix2 != (l+1)*D1);
412 } // end for (j=
413
414 /* copy to output */
415 for (j=0; j<D1; j++) {
416 LOutputSum[ix0+j] = LongOutputSum[ix0+j];
417 LOutputCount[ix0+j] = LongOutputCount[ix0+j];
418 LOutputSum2[ix0+j] = LongOutputSum2[ix0+j];
419 }
420 }
421 }
422
423 if (LongOutputSum) mxFree(LongOutputSum);
424 if (LongOutputCount) mxFree(LongOutputCount);
425 if (LongOutputSum2) mxFree(LongOutputSum2);
426
427 #ifndef NO_FLAG
428 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned.");
429 /* this is a hack, the third input argument is used to return whether a NaN occured or not.
430 this requires that the input argument is a non-empty variable
431 */
432 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) {
433 // set FLAG_NANS_OCCURED
434 switch (mxGetClassID(PInputs[2])) {
435 case mxLOGICAL_CLASS:
436 case mxCHAR_CLASS:
437 case mxINT8_CLASS:
438 case mxUINT8_CLASS:
439 *(uint8_t*)mxGetData(PInputs[2]) = 1;
440 break;
441 case mxDOUBLE_CLASS:
442 *(double*)mxGetData(PInputs[2]) = 1.0;
443 break;
444 case mxSINGLE_CLASS:
445 *(float*)mxGetData(PInputs[2]) = 1.0;
446 break;
447 case mxINT16_CLASS:
448 case mxUINT16_CLASS:
449 *(uint16_t*)mxGetData(PInputs[2]) = 1;
450 break;
451 case mxINT32_CLASS:
452 case mxUINT32_CLASS:
453 *(uint32_t*)mxGetData(PInputs[2])= 1;
454 break;
455 case mxINT64_CLASS:
456 case mxUINT64_CLASS:
457 *(uint64_t*)mxGetData(PInputs[2]) = 1;
458 break;
459 case mxFUNCTION_CLASS:
460 case mxUNKNOWN_CLASS:
461 case mxCELL_CLASS:
462 case mxSTRUCT_CLASS:
463 default:
464 mexPrintf("Type of 3rd input argument not supported.");
465 }
466 }
467 #endif
468 }
469
470 #define stride 1
__sumskipnan2w__(double * data,size_t Ni,double * s,double * No,char * flag_anyISNAN,double * W)471 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
472 {
473 long double sum=0;
474 char flag=0;
475 // LOOP along dimension DIM
476
477 double *end = data + stride*Ni;
478 if (W) {
479 // with weight vector
480 long double count = 0.0;
481 do {
482 long double x = *data;
483 if (!isnan(x))
484 {
485 count += *W;
486 sum += *W*x;
487 }
488 #ifndef NO_FLAG
489 else
490 flag = 1;
491 #endif
492
493 data++; // stride=1
494 W++;
495 }
496 while (data < end);
497 *No = count;
498 } else {
499 // w/o weight vector
500 size_t countI = 0;
501 do {
502 long double x = *data;
503 if (!isnan(x))
504 {
505 countI++;
506 sum += x;
507 }
508 #ifndef NO_FLAG
509 else
510 flag = 1;
511 #endif
512 data++; // stride=1
513 }
514 while (data < end);
515 *No = (double)countI;
516 }
517
518 #ifndef NO_FLAG
519 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
520 #endif
521 *s = sum;
522
523 }
524
525
__sumskipnan3w__(double * data,size_t Ni,double * s,double * s2,double * No,char * flag_anyISNAN,double * W)526 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
527 {
528 long double sum=0;
529 long double msq=0;
530 char flag=0;
531 // LOOP along dimension DIM
532
533 double *end = data + stride*Ni;
534 if (W) {
535 // with weight vector
536 long double count = 0.0;
537 do {
538 long double x = *data;
539 if (!isnan(x)) {
540 count += *W;
541 long double t = *W*x;
542 sum += t;
543 msq += x*t;
544 }
545 #ifndef NO_FLAG
546 else
547 flag = 1;
548 #endif
549 data++; // stride=1
550 W++;
551 }
552 while (data < end);
553 *No = count;
554 } else {
555 // w/o weight vector
556 size_t countI = 0;
557 do {
558 long double x = *data;
559 if (!isnan(x)) {
560 countI++;
561 sum += x;
562 msq += x*x;
563 }
564 #ifndef NO_FLAG
565 else
566 flag = 1;
567 #endif
568 data++; // stride=1
569 }
570 while (data < end);
571 *No = (double)countI;
572 }
573
574 #ifndef NO_FLAG
575 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
576 #endif
577 *s = sum;
578 *s2 = msq;
579 }
580
__sumskipnan2wr__(double * data,size_t Ni,double * s,double * No,char * flag_anyISNAN,double * W)581 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
582 {
583 double sum=0;
584 char flag=0;
585 // LOOP along dimension DIM
586
587 double *end = data + stride*Ni;
588 if (W) {
589 // with weight vector
590 double count = 0.0;
591 do {
592 double x = *data;
593 if (!isnan(x))
594 {
595 count += *W;
596 sum += *W*x;
597 }
598 #ifndef NO_FLAG
599 else
600 flag = 1;
601 #endif
602
603 data++; // stride=1
604 W++;
605 }
606 while (data < end);
607 *No = count;
608 } else {
609 // w/o weight vector
610 size_t countI = 0;
611 do {
612 double x = *data;
613 if (!isnan(x))
614 {
615 countI++;
616 sum += x;
617 }
618 #ifndef NO_FLAG
619 else
620 flag = 1;
621 #endif
622 data++; // stride=1
623 }
624 while (data < end);
625 *No = (double)countI;
626 }
627
628 #ifndef NO_FLAG
629 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
630 #endif
631 *s = sum;
632
633 }
634
635
__sumskipnan3wr__(double * data,size_t Ni,double * s,double * s2,double * No,char * flag_anyISNAN,double * W)636 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
637 {
638 double sum=0;
639 double msq=0;
640 char flag=0;
641 // LOOP along dimension DIM
642
643 double *end = data + stride*Ni;
644 if (W) {
645 // with weight vector
646 double count = 0.0;
647 do {
648 double x = *data;
649 if (!isnan(x)) {
650 count += *W;
651 double t = *W*x;
652 sum += t;
653 msq += x*t;
654 }
655 #ifndef NO_FLAG
656 else
657 flag = 1;
658 #endif
659 data++; // stride=1
660 W++;
661 }
662 while (data < end);
663 *No = count;
664 } else {
665 // w/o weight vector
666 size_t countI = 0;
667 do {
668 double x = *data;
669 if (!isnan(x)) {
670 countI++;
671 sum += x;
672 msq += x*x;
673 }
674 #ifndef NO_FLAG
675 else
676 flag = 1;
677 #endif
678 data++; // stride=1
679 }
680 while (data < end);
681 *No = (double)countI;
682 }
683
684 #ifndef NO_FLAG
685 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
686 #endif
687 *s = sum;
688 *s2 = msq;
689 }
690
691
692
693 /***************************************
694 using Kahan's summation formula [1]
695 this gives more accurate results while the computational effort within the loop is about 4x as high
696 First tests show a penalty of about 40% in terms of computational time.
697
698 [1] David Goldberg,
699 What Every Computer Scientist Should Know About Floating-Point Arithmetic
700 ACM Computing Surveys, Vol 23, No 1, March 1991.
701 ****************************************/
702
__sumskipnan2we__(double * data,size_t Ni,double * s,double * No,char * flag_anyISNAN,double * W)703 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
704 {
705 long double sum=0;
706 char flag=0;
707 // LOOP along dimension DIM
708
709 double *end = data + stride*Ni;
710 if (W) {
711 // with weight vector
712 long double count = 0.0;
713 long double rc=0.0, rn=0.0;
714 do {
715 long double x = *data;
716 long double t,y;
717 if (!isnan(x))
718 {
719 //count += *W; [1]
720 y = *W-rn;
721 t = count+y;
722 rn= (t-count)-y;
723 count= t;
724
725 //sum += *W*x; [1]
726 y = *W*x-rc;
727 t = sum+y;
728 rc= (t-sum)-y;
729 sum= t;
730 }
731 #ifndef NO_FLAG
732 else
733 flag = 1;
734 #endif
735
736 data++; // stride=1
737 W++;
738 }
739 while (data < end);
740 *No = count;
741 } else {
742 // w/o weight vector
743 size_t countI = 0;
744 long double rc=0.0;
745 do {
746 long double x = *data;
747 long double t,y;
748 if (!isnan(x))
749 {
750 countI++;
751 // sum += x; [1]
752 y = x-rc;
753 t = sum+y;
754 rc= (t-sum)-y;
755 sum= t;
756 }
757 #ifndef NO_FLAG
758 else
759 flag = 1;
760 #endif
761 data++; // stride=1
762 }
763 while (data < end);
764 *No = (double)countI;
765 }
766
767 #ifndef NO_FLAG
768 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
769 #endif
770 *s = sum;
771
772 }
773
774
__sumskipnan3we__(double * data,size_t Ni,double * s,double * s2,double * No,char * flag_anyISNAN,double * W)775 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
776 {
777 long double sum=0;
778 long double msq=0;
779 char flag=0;
780 // LOOP along dimension DIM
781
782 double *end = data + stride*Ni;
783 if (W) {
784 // with weight vector
785 long double count = 0.0;
786 long double rc=0.0, rn=0.0, rq=0.0;
787 do {
788 long double x = *data;
789 long double t,y;
790 if (!isnan(x)) {
791 //count += *W; [1]
792 y = *W-rn;
793 t = count+y;
794 rn= (t-count)-y;
795 count= t;
796
797 long double w = *W*x;
798 //sum += *W*x; [1]
799 y = *W*x-rc;
800 t = sum+y;
801 rc= (t-sum)-y;
802 sum= t;
803
804 // msq += x*w;
805 y = w*x-rq;
806 t = msq+y;
807 rq= (t-msq)-y;
808 msq= t;
809 }
810 #ifndef NO_FLAG
811 else
812 flag = 1;
813 #endif
814 data++; // stride=1
815 W++;
816 }
817 while (data < end);
818 *No = count;
819 } else {
820 // w/o weight vector
821 size_t countI = 0;
822 long double rc=0.0, rq=0.0;
823 do {
824 long double x = *data;
825 long double t,y;
826 if (!isnan(x)) {
827 countI++;
828 //sum += x; [1]
829 y = x-rc;
830 t = sum+y;
831 rc= (t-sum)-y;
832 sum= t;
833
834 // msq += x*x;
835 y = x*x-rq;
836 t = msq+y;
837 rq= (t-msq)-y;
838 msq= t;
839 }
840 #ifndef NO_FLAG
841 else
842 flag = 1;
843 #endif
844 data++; // stride=1
845 }
846 while (data < end);
847 *No = (double)countI;
848 }
849
850 #ifndef NO_FLAG
851 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
852 #endif
853 *s = sum;
854 *s2 = msq;
855 }
856
__sumskipnan2wer__(double * data,size_t Ni,double * s,double * No,char * flag_anyISNAN,double * W)857 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
858 {
859 double sum=0;
860 char flag=0;
861 // LOOP along dimension DIM
862
863 double *end = data + stride*Ni;
864 if (W) {
865 // with weight vector
866 double count = 0.0;
867 double rc=0.0, rn=0.0;
868 do {
869 double x = *data;
870 double t,y;
871 if (!isnan(x))
872 {
873 //count += *W; [1]
874 y = *W-rn;
875 t = count+y;
876 rn= (t-count)-y;
877 count= t;
878
879 //sum += *W*x; [1]
880 y = *W*x-rc;
881 t = sum+y;
882 rc= (t-sum)-y;
883 sum= t;
884 }
885 #ifndef NO_FLAG
886 else
887 flag = 1;
888 #endif
889
890 data++; // stride=1
891 W++;
892 }
893 while (data < end);
894 *No = count;
895 } else {
896 // w/o weight vector
897 size_t countI = 0;
898 double rc=0.0;
899 do {
900 double x = *data;
901 double t,y;
902 if (!isnan(x))
903 {
904 countI++;
905 // sum += x; [1]
906 y = x-rc;
907 t = sum+y;
908 rc= (t-sum)-y;
909 sum= t;
910 }
911 #ifndef NO_FLAG
912 else
913 flag = 1;
914 #endif
915 data++; // stride=1
916 }
917 while (data < end);
918 *No = (double)countI;
919 }
920
921 #ifndef NO_FLAG
922 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
923 #endif
924 *s = sum;
925
926 }
927
928
__sumskipnan3wer__(double * data,size_t Ni,double * s,double * s2,double * No,char * flag_anyISNAN,double * W)929 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
930 {
931 double sum=0;
932 double msq=0;
933 char flag=0;
934 // LOOP along dimension DIM
935
936 double *end = data + stride*Ni;
937 if (W) {
938 // with weight vector
939 double count = 0.0;
940 double rc=0.0, rn=0.0, rq=0.0;
941 do {
942 double x = *data;
943 double t,y;
944 if (!isnan(x)) {
945 //count += *W; [1]
946 y = *W-rn;
947 t = count+y;
948 rn= (t-count)-y;
949 count= t;
950
951 double w = *W*x;
952 //sum += *W*x; [1]
953 y = *W*x-rc;
954 t = sum+y;
955 rc= (t-sum)-y;
956 sum= t;
957
958 // msq += x*w;
959 y = w*x-rq;
960 t = msq+y;
961 rq= (t-msq)-y;
962 msq= t;
963 }
964 #ifndef NO_FLAG
965 else
966 flag = 1;
967 #endif
968 data++; // stride=1
969 W++;
970 }
971 while (data < end);
972 *No = count;
973 } else {
974 // w/o weight vector
975 size_t countI = 0;
976 double rc=0.0, rq=0.0;
977 do {
978 double x = *data;
979 double t,y;
980 if (!isnan(x)) {
981 countI++;
982 //sum += x; [1]
983 y = x-rc;
984 t = sum+y;
985 rc= (t-sum)-y;
986 sum= t;
987
988 // msq += x*x;
989 y = x*x-rq;
990 t = msq+y;
991 rq= (t-msq)-y;
992 msq= t;
993 }
994 #ifndef NO_FLAG
995 else
996 flag = 1;
997 #endif
998 data++; // stride=1
999 }
1000 while (data < end);
1001 *No = (double)countI;
1002 }
1003
1004 #ifndef NO_FLAG
1005 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1;
1006 #endif
1007 *s = sum;
1008 *s2 = msq;
1009 }
1010
1011