1 /*************************************************************************** 2 smoothPolyDnans.hpp -- nan version of smooth accelerated n D 3 ------------------- 4 begin : May 30 2017 5 copyright : (C) 2017 by G. Duvert 6 email : m_schellens@users.sourceforge.net 7 ***************************************************************************/ 8 9 /*************************************************************************** 10 * * 11 * This program is free software; you can redistribute it and/or modify * 12 * it under the terms of the GNU General Public License as published by * 13 * the Free Software Foundation; either version 2 of the License, or * 14 * (at your option) any later version. * 15 * * 16 ***************************************************************************/ 17 18 // to be included from datatypes.cpp 19 #ifdef INCLUDE_SMOOTH_POLYD_NAN 20 21 SMOOTH_Ty* src=srcIn; 22 SMOOTH_Ty* dest=destIn; 23 SMOOTH_Ty* store; 24 25 SizeT srcDim[MAXRANK]; 26 for (int i = 0; i < rank; ++i) srcDim[i] = datainDim[i]; 27 28 SizeT nEl=1; 29 for (int i=0; i< rank; ++i) nEl*=srcDim[i]; 30 31 DUInt perm[rank]; //turntable transposition index list 32 for (int i = 0; i < rank; ++i) perm[i] = ((i + 1) % rank); //[1,2,...,0] 33 34 SizeT destStride[ MAXRANK + 1]; 35 36 SizeT destDim[ MAXRANK]; //use destDim mainly to turn resDim each time 37 for (int i = 0; i < rank; ++i) destDim[i] = srcDim[i]; 38 39 // successively apply smooth 1d and write perm-transposed values in res, then exchange res and data for next iteration. 40 // the trick is to update srcDim and resStride each time. 41 for (int r = 0; r < rank; ++r) { 42 //accelerator: compute destStride from a perm-uted srcDim: 43 destStride[0] = 1; 44 destStride[1] = srcDim[perm[0]]; 45 int m = 1; 46 for (; m < rank; ++m) destStride[m + 1] = destStride[m] * srcDim[perm[m]]; 47 for (; m < MAXRANK; ++m) destStride[m + 1] = destStride[rank]; 48 49 SizeT dimx = srcDim[0]; //which has been permutated 50 SizeT dimy = nEl / dimx; 51 SizeT w = width[r] / 2; 52 if (w == 0) {//fast transpose 53 #pragma omp parallel if (nEl >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= nEl)) 54 { 55 #pragma omp for nowait 56 for (SizeT i = 0; i < nEl; ++i) dest[transposed1Index(i, srcDim, destStride, rank)] = src[i]; 57 } 58 } else { //smooth & transpose 59 #pragma omp parallel if (nEl >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= nEl)) 60 { 61 #pragma omp for nowait 62 63 for (SizeT j = 0; j < dimy; ++j) { 64 //initiate mean of Width first values: 65 DDouble z; 66 DDouble n = 0; 67 DDouble mean = 0; 68 for (SizeT i = j * dimx; i < j * dimx + (2 * w + 1); ++i) { 69 DDouble v = src[i]; 70 if (isfinite(v)) { 71 n += 1.0; 72 z = 1. / n; 73 mean = (1. - z) * mean + z * v; 74 } 75 } 76 77 #if defined(USE_EDGE) 78 //start: use mean1, n1 79 DDouble mean1 = mean; 80 DDouble n1 = n; 81 for (SizeT i = 0; i < w; ++i) { 82 dest[transposed1Index(w - i + j * dimx, srcDim, destStride, rank)] = (n1 > 0) ? mean1 : src[w - i + j * dimx]; 83 DDouble v = src[2 * w - i + j * dimx]; 84 if (isfinite(v)) { 85 mean1 *= n1; 86 mean1 -= v; 87 n1 -= 1.0; 88 mean1 /= n1; 89 } 90 if (n1 <= 0) mean1 = 0; 91 92 #if defined (EDGE_WRAP) 93 v = src[dimx - i - 1 + j * dimx]; 94 if (isfinite(v)) { 95 #elif defined (EDGE_TRUNCATE) 96 v = src[0 + j * dimx]; 97 if (isfinite(v)) { 98 #elif defined (EDGE_MIRROR) 99 v = src[i + j * dimx]; 100 if (isfinite(v)) { 101 #elif defined (EDGE_ZERO) 102 v = 0; 103 { 104 #endif 105 mean1 *= n1; 106 if (n1 < 2 * w + 1) n1 += 1.0; 107 mean1 += v; 108 mean1 /= n1; 109 } 110 } 111 dest[transposed1Index(0 + j*dimx, srcDim, destStride, rank)] = (n1 > 0) ? mean1 : src[0 + j*dimx]; 112 #else //just transpose 113 for (SizeT i = j * dimx; i < j * dimx + w; ++i) dest[transposed1Index(i, srcDim, destStride, rank)] = src[i]; 114 #endif 115 116 //center 117 for (SizeT i = w + j * dimx, ibef = j * dimx, iend = 2 * w + 1 + j * dimx; i < dimx - w - 1 + j * dimx; ++i, ++ibef, ++iend) { 118 dest[transposed1Index(i, srcDim, destStride, rank)] = (n > 0) ? mean : src[i]; 119 DDouble v = src[ibef]; 120 if (isfinite(v)) { 121 mean *= n; 122 mean -= v; 123 n -= 1.0; 124 mean /= n; 125 } 126 127 if (n <= 0) mean = 0; 128 129 v = src[iend]; 130 if (isfinite(v)) { 131 mean *= n; 132 if (n < 2 * w + 1) n += 1.0; 133 mean += v; 134 mean /= n; 135 } 136 } 137 dest[transposed1Index(dimx - 1 - w + j*dimx, srcDim, destStride, rank)] = (n > 0) ? mean : src[dimx - 1 - w + j*dimx]; 138 139 #if defined(USE_EDGE) 140 //end: use mean2 & n as we go in the same direction... 141 DDouble mean2 = mean; 142 143 for (SizeT i = dimx - 1 - w; i < dimx - 1; ++i) { 144 dest[transposed1Index(i + j*dimx, srcDim, destStride, rank)] = (n > 0) ? mean2 : src[i + j*dimx]; 145 DDouble v = src[i - w + j * dimx]; 146 if (isfinite(v)) { 147 mean2 *= n; 148 mean2 -= v; 149 n -= 1.0; 150 mean2 /= n; 151 } 152 153 if (n <= 0) mean2 = 0; 154 155 #if defined (EDGE_WRAP) 156 v = src[i - dimx + w + 1 + j * dimx]; 157 if (isfinite(v)) { 158 #elif defined (EDGE_TRUNCATE) 159 v = src[dimx - 1 + j * dimx]; 160 if (isfinite(v)) { 161 #elif defined (EDGE_MIRROR) 162 v = src[2 * dimx - i - w - 2 + j * dimx]; 163 if (isfinite(v)) { 164 #elif defined (EDGE_ZERO) 165 v = 0; 166 { 167 #endif 168 mean2 *= n; 169 if (n < 2 * w + 1) n += 1.0; 170 mean2 += v; 171 mean2 /= n; 172 } 173 } 174 dest[transposed1Index(dimx - 1 + j*dimx, srcDim, destStride, rank)] = (n > 0) ? mean2 : src[dimx - 1 + j*dimx]; 175 #else //just transpose 176 for (SizeT i = (dimx - w + j * dimx); i < (dimx + j * dimx); ++i) dest[transposed1Index(i, srcDim, destStride, rank)] = src[i]; 177 #endif 178 } 179 } 180 } 181 //pseudo-dim of src is now rotated by 1 182 SizeT tempSize[MAXRANK]; 183 for (int i = 0; i < rank; ++i) tempSize[i] = srcDim[i]; 184 for (int i = 0; i < rank; ++i) srcDim[i] = tempSize[perm[i]]; 185 // exchange dest and src 186 store=src; 187 src=dest; 188 dest=store; //now 'src' is the last computed array, whatever srcIn or destIn... 189 } 190 //if rank is even, must copy back src to destIn (since only the pointers were exchanged) 191 if (rank%2 == 0) memcpy(destIn, src, nEl * sizeof (src[0])); 192 #endif 193