1 /*
2 This file is part of CDO. CDO is a collection of Operators to manipulate and analyse Climate model Data.
3
4 Author: Uwe Schulzweida
5
6 */
7
8 #include <cmath>
9
10 #ifdef HAVE_CONFIG_H
11 #include "config.h"
12 #endif
13
14 #include "cdo_options.h"
15 #include "cimdOmp.h"
16 #include "constants.h"
17 #include "varray.h"
18 #include "gaussian_latitudes.h"
19
20
21 static void
jspleg1(double * pleg,double plat,long ktrunc,double * work)22 jspleg1(double *pleg, double plat, long ktrunc, double *work)
23 {
24 /*
25 jspleg1 - Routine to calculate legendre functions
26
27 Purpose
28 --------
29
30 This routine calculates the legendre functions for one latitude.
31 (but not their derivatives)
32
33 Interface
34 ----------
35
36 jspleg1( pleg, plat, ktrunc)
37
38 Input parameters
39 ----------------
40
41 plat - Latitude in radians
42 ktrunc - Spectral truncation
43
44 Output parameters
45 -----------------
46
47 pleg - Array of legendre functions for one latitude.
48 The array must be at least (KTRUNC+1)*(KTRUNC+4)/2 words long.
49
50 Method
51 ------
52
53 Recurrence relation with explicit relations for P(m,m) and P(m,m+1)
54
55 AUTHOR
56 ------
57
58 J.D.Chambers ECMWF 9 November 1993
59
60 Modifications
61 -------------
62
63 None
64 */
65
66 // Initialization
67
68 const auto itout1 = ktrunc + 1;
69 // const auto zsin = std::sin(plat);
70 const auto zsin = plat;
71 const auto zcos = std::sqrt(1.0 - zsin * zsin);
72
73 double *zhlp1 = work;
74 double *zhlp2 = work + itout1;
75 double *zhlp3 = work + itout1 + itout1;
76
77 // Step 1. M = 0, N = 0 and N = 1
78
79 long ilm = 1;
80 pleg[0] = 1.0;
81 auto zf1m = std::sqrt(3.0);
82 pleg[1] = zf1m * zsin;
83
84 // Step 2. Sum for M = 0 to T (T = truncation)
85
86 for (long jm = 1; jm < itout1; jm++)
87 {
88 zhlp1[jm] = std::sqrt(2.0 * jm + 3.0);
89 zhlp2[jm] = 1.0 / std::sqrt(2.0 * jm);
90 }
91
92 zhlp1[0] = std::sqrt(3.0);
93
94 for (long jm = 0; jm < itout1; jm++)
95 {
96 const auto i1m = jm - 1;
97 const auto zre1 = zhlp1[jm];
98 auto ze1 = 1.0 / zre1;
99
100 // Step 3. M > 0 only
101
102 if (i1m != -1)
103 {
104 const auto zf2m = zf1m * zcos * zhlp2[jm];
105 zf1m = zf2m * zre1;
106
107 // Step 4. N = M and N = M+1
108
109 pleg[++ilm] = zf2m;
110 pleg[++ilm] = zf1m * zsin;
111
112 // When output truncation is reached, return to calling program
113
114 if (jm == (itout1 - 1)) break;
115 }
116
117 // Step 5. Sum for N = M+2 to T+1
118
119 const double zjmsqr = jm * jm;
120 const auto im2 = i1m + 2;
121
122 for (long jcn = im2; jcn < itout1; jcn++)
123 {
124 const double znsqr = (jcn + 1) * (jcn + 1);
125 zhlp3[jcn] = std::sqrt((4.0 * znsqr - 1.0) / (znsqr - zjmsqr));
126 }
127
128 for (long jcn = im2; jcn < itout1; jcn++)
129 {
130 const auto ze2 = zhlp3[jcn];
131 ilm++;
132 pleg[ilm] = ze2 * (zsin * pleg[ilm - 1] - ze1 * pleg[ilm - 2]);
133 ze1 = 1.0 / ze2;
134 }
135 }
136 }
137
138 // phcs - Compute values of Legendre polynomials and their meridional derivatives
139 static void
phcs(bool needHnm,double * pnm,double * hnm,long waves,double pmu,double * ztemp1,double * ztemp2)140 phcs(bool needHnm, double *pnm, double *hnm, long waves, double pmu, double *ztemp1, double *ztemp2)
141 {
142 long jnmjk;
143 double zcospar, zsinpar, zcosfak, zsinfak;
144 double zq, zwm2, zw, zwq, zq2m1, zwm2q2, z2q2, zcnm, zdnm, zenm;
145
146 const auto twowaves = waves << 1;
147
148 const auto zcos2 = std::sqrt(1.0 - pmu * pmu);
149 const auto lat = std::acos(pmu);
150 auto zan = 1.0;
151
152 ztemp1[0] = 0.5;
153
154 for (long jn = 1; jn < twowaves; jn++)
155 {
156 const auto zsqp = 1.0 / std::sqrt((double) (jn + jn * jn));
157 zan *= std::sqrt(1.0 - 1.0 / (4 * jn * jn));
158
159 zcospar = std::cos(lat * jn);
160 zsinpar = std::sin(lat * jn) * jn * zsqp;
161 zcosfak = 1.0;
162
163 for (long jk = 2; jk < jn; jk += 2)
164 {
165 jnmjk = jn - jk;
166 zcosfak *= (jk - 1.0) * (jn + jnmjk + 2.0) / (jk * (jn + jnmjk + 1.0));
167 zsinfak = zcosfak * zsqp * (double)jnmjk;
168 zcospar += zcosfak * std::cos(lat * jnmjk);
169 zsinpar += zsinfak * std::sin(lat * jnmjk);
170 }
171
172 // code for jk == jn
173
174 if ((jn & 1) == 0)
175 {
176 zcosfak *= (double) ((jn - 1) * (jn + 2)) / (double) (jn * (jn + 1));
177 zcospar += zcosfak * 0.5;
178 }
179 ztemp1[jn] = zan * zcospar;
180 ztemp2[jn - 1] = zan * zsinpar;
181 }
182
183 memcpy(pnm, ztemp1, waves * sizeof(double));
184 pnm += waves;
185 memcpy(pnm, ztemp2, waves * sizeof(double));
186 pnm += waves;
187
188 if (needHnm)
189 {
190 hnm[0] = 0.0;
191 for (long jn = 1; jn < waves; jn++)
192 hnm[jn] = jn * (pmu * ztemp1[jn] - std::sqrt((jn + jn + 1.0) / (jn + jn - 1.0)) * ztemp1[jn - 1]);
193
194 hnm += waves;
195
196 hnm[0] = pmu * ztemp2[0];
197
198 for (long jn = 1; jn < waves; jn++)
199 hnm[jn] = (jn + 1) * pmu * ztemp2[jn]
200 - std::sqrt(((jn + jn + 3.0) * ((jn + 1) * (jn + 1) - 1.0)) / (jn + jn + 1.0)) * ztemp2[jn - 1];
201
202 hnm += waves;
203 }
204
205 for (long jm = 2; jm < waves; jm++)
206 {
207 pnm[0] = std::sqrt(1.0 + 1.0 / (jm + jm)) * zcos2 * ztemp2[0];
208 if (needHnm) hnm[0] = jm * pmu * pnm[0];
209 #if defined(CRAY)
210 #pragma _CRI novector
211 #endif
212 #if defined(__uxp__)
213 #pragma loop scalar
214 #endif
215 for (long jn = 1; jn < (twowaves - jm); jn++)
216 {
217 zq = jm + jm + jn - 1;
218 zwm2 = zq + jn;
219 zw = zwm2 + 2.0;
220 zwq = zw * zq;
221 zq2m1 = zq * zq - 1.0;
222 zwm2q2 = zwm2 * zq2m1;
223 z2q2 = zq2m1 * 2;
224 zcnm = std::sqrt((zwq * (zq - 2.0)) / (zwm2q2 - z2q2));
225 zdnm = std::sqrt((zwq * (jn + 1.0)) / zwm2q2);
226 zenm = std::sqrt(zw * jn / ((zq + 1.0) * zwm2));
227 pnm[jn] = zcnm * ztemp1[jn] - pmu * (zdnm * ztemp1[jn + 1] - zenm * pnm[jn - 1]);
228 if (needHnm) hnm[jn] = (jm + jn) * pmu * pnm[jn] - std::sqrt(zw * jn * (zq + 1) / zwm2) * pnm[jn - 1];
229 }
230 memcpy(ztemp1, ztemp2, twowaves * sizeof(double));
231 memcpy(ztemp2, pnm, twowaves * sizeof(double));
232 pnm += waves;
233 if (needHnm) hnm += waves;
234 }
235 }
236
237 void
after_legini_full(long ntr,long nlat,double * restrict poli,double * restrict pold,double * restrict pdev,double * restrict pol2,double * restrict pol3,double * restrict coslat)238 after_legini_full(long ntr, long nlat, double *restrict poli, double *restrict pold, double *restrict pdev, double *restrict pol2,
239 double *restrict pol3, double *restrict coslat)
240 {
241 auto dimsp = (ntr + 1) * (ntr + 2);
242 auto waves = ntr + 1;
243 const auto twowaves = waves << 1;
244
245 Varray<double> gmu(nlat), gwt(nlat);
246 gaussian_latitudes(nlat, gmu.data(), gwt.data());
247
248 auto needHnm = (pdev != nullptr) || (pol2 != nullptr);
249
250 #ifdef _OPENMP
251 Varray2D<double> pnm_2(Threading::ompNumThreads);
252 Varray2D<double> hnm_2(Threading::ompNumThreads);
253 Varray2D<double> ztemp1_2(Threading::ompNumThreads);
254 Varray2D<double> ztemp2_2(Threading::ompNumThreads);
255 for (long i = 0; i < Threading::ompNumThreads; ++i) pnm_2[i].resize(dimsp);
256 if (needHnm) for (long i = 0; i < Threading::ompNumThreads; ++i) hnm_2[i].resize(dimsp);
257 for (long i = 0; i < Threading::ompNumThreads; ++i) ztemp1_2[i].resize(twowaves);
258 for (long i = 0; i < Threading::ompNumThreads; ++i) ztemp2_2[i].resize(twowaves);
259 #else
260 Varray<double> pnm(dimsp), hnm, ztemp1(twowaves), ztemp2(twowaves);
261 if (needHnm) hnm.resize(dimsp);
262 #endif
263
264 #ifdef _OPENMP
265 #pragma omp parallel for default(none) shared(needHnm, nlat, waves, dimsp, pnm_2, hnm_2, ztemp1_2, ztemp2_2, poli, pold, pdev, pol2, pol3, coslat, gmu, gwt, PlanetRadius)
266 #endif
267 for (long jgl = 0; jgl < nlat; ++jgl)
268 {
269 #ifdef _OPENMP
270 const auto ompthID = cdo_omp_get_thread_num();
271 auto &pnm = pnm_2[ompthID];
272 auto &hnm = hnm_2[ompthID];
273 auto &ztemp1 = ztemp1_2[ompthID];
274 auto &ztemp2 = ztemp2_2[ompthID];
275 #endif
276 const auto gmusq = 1.0 - gmu[jgl] * gmu[jgl];
277 coslat[jgl] = std::sqrt(gmusq);
278
279 phcs(needHnm, pnm.data(), hnm.data(), waves, gmu[jgl], ztemp1.data(), ztemp2.data());
280
281 const auto zgwt = gwt[jgl];
282 const auto zrafgmusqr = 1.0 / (PlanetRadius * gmusq);
283 const auto zradsqrtgmusqr = 1.0 / (-PlanetRadius * std::sqrt(gmusq));
284
285 auto jsp = jgl;
286 for (long jm = 0; jm < waves; ++jm)
287 for (long jn = 0; jn < (waves - jm); ++jn)
288 {
289 if (poli) poli[jsp] = pnm[jm * waves + jn] * 2.0;
290 if (pold) pold[jsp] = pnm[jm * waves + jn] * zgwt;
291 if (pdev) pdev[jsp] = hnm[jm * waves + jn] * 2.0 * zradsqrtgmusqr;
292 if (pol2) pol2[jsp] = hnm[jm * waves + jn] * zgwt * zrafgmusqr;
293 if (pol3) pol3[jsp] = pnm[jm * waves + jn] * zgwt * jm * zrafgmusqr;
294 jsp += nlat;
295 }
296 }
297 }
298 /*
299 void
300 after_legini(long ntr, long nlat, double *restrict poli, double *restrict pold, double *restrict coslat)
301 {
302 long waves = ntr + 1; // used in omp loop
303 long dimpnm = (ntr + 1) * (ntr + 4) / 2; // used in omp loop
304
305 #ifdef _OPENMP
306 Varray2D<double> pnm_2(Threading::ompNumThreads);
307 Varray2D<double> work_2(Threading::ompNumThreads);
308 for (long i = 0; i < Threading::ompNumThreads; ++i) pnm_2[i].resize(dimpnm);
309 for (long i = 0; i < Threading::ompNumThreads; ++i) work_2[i].resize(3 * waves);
310 #else
311 Varray<double> pnm(dimpnm), work(3 * waves);
312 #endif
313
314 Varray<double> gmu(nlat), gwt(nlat);
315 gaussian_latitudes(nlat, gmu.data(), gwt.data());
316 for (long jgl = 0; jgl < nlat; ++jgl) gwt[jgl] *= 0.5;
317
318 for (long jgl = 0; jgl < nlat; ++jgl) coslat[jgl] = std::sqrt(1.0 - gmu[jgl] * gmu[jgl]);
319
320 #ifdef _OPENMP
321 #pragma omp parallel for default(none) shared(nlat, waves, ntr, pnm_2, work_2, gwt, gmu, poli, pold)
322 #endif
323 for (long jgl = 0; jgl < nlat / 2; jgl++)
324 {
325 #ifdef _OPENMP
326 const auto ompthID = cdo_omp_get_thread_num();
327 auto pnm = pnm_2[ompthID].data();
328 auto work = work_2[ompthID].data();
329 #endif
330 const auto zgwt = gwt[jgl];
331
332 jspleg1(&pnm[0], gmu[jgl], ntr, &work[0]);
333
334 long latn = jgl;
335 long lats;
336 long isp = 0;
337 double is;
338 for (long jm = 0; jm < waves; ++jm)
339 {
340 for (long jn = 0; jn < waves - jm; ++jn)
341 {
342 is = (jn + 1) % 2 * 2 - 1;
343 lats = latn - jgl + nlat - jgl - 1;
344 poli[latn] = pnm[isp];
345 pold[latn] = pnm[isp] * zgwt;
346 poli[lats] = pnm[isp] * is;
347 pold[lats] = pnm[isp] * zgwt * is;
348 latn += nlat;
349 isp++;
350 }
351 isp++;
352 }
353 }
354 }
355 */
356 static inline void
sp2fc_kernel(long nlat,const double * pol,const double * sal,double * restrict far,double * restrict fai)357 sp2fc_kernel(long nlat, const double *pol, const double *sal, double *restrict far, double *restrict fai)
358 {
359 const auto sar = sal[0];
360 const auto sai = sal[1];
361 for (long lat = 0; lat < nlat; lat++)
362 {
363 far[lat] += pol[lat] * sar;
364 fai[lat] += pol[lat] * sai;
365 }
366 }
367
368 void
sp2fc(const double * sa,double * fa,const double * poli,long nlev,long nlat,long nfc,long nt)369 sp2fc(const double *sa, double *fa, const double *poli, long nlev, long nlat, long nfc, long nt)
370 {
371 long ntp1 = nt + 1;
372 long nsp2 = (nt + 1) * (nt + 2);
373
374 std::vector<long> cumindex(ntp1);
375 cumindex[0] = 0;
376 for (long jmm = 1; jmm < ntp1; jmm++) cumindex[jmm] = cumindex[jmm - 1] + (ntp1 - jmm + 1);
377
378 if (nlev >= Threading::ompNumThreads)
379 {
380 #ifdef _OPENMP
381 #pragma omp parallel for default(shared)
382 #endif
383 for (long lev = 0; lev < nlev; lev++)
384 {
385 auto sal = sa + lev * nsp2;
386 auto fal = fa + lev * nfc * nlat;
387 memset(fal, 0, nfc * nlat * sizeof(double));
388
389 for (long jmm = 0; jmm < ntp1; jmm++)
390 {
391 auto polt = poli + cumindex[jmm] * nlat;
392 auto salt = sal + cumindex[jmm] * 2;
393 auto far = fal + jmm * 2 * nlat;
394 auto fai = far + nlat;
395 for (long jfc = 0; jfc < (ntp1 - jmm); jfc++)
396 {
397 sp2fc_kernel(nlat, polt + jfc * nlat, salt + jfc * 2, far, fai);
398 }
399 }
400 }
401 }
402 else
403 {
404 for (long lev = 0; lev < nlev; lev++)
405 {
406 auto sal = sa + lev * nsp2;
407 auto fal = fa + lev * nfc * nlat;
408 memset(fal, 0, nfc * nlat * sizeof(double));
409
410 #ifdef _OPENMP
411 #pragma omp parallel for default(shared) schedule(dynamic)
412 #endif
413 for (long jmm = 0; jmm < ntp1; jmm++)
414 {
415 auto polt = poli + cumindex[jmm] * nlat;
416 auto salt = sal + cumindex[jmm] * 2;
417 auto far = fal + jmm * 2 * nlat;
418 auto fai = far + nlat;
419 for (long jfc = 0; jfc < (ntp1 - jmm); jfc++)
420 {
421 sp2fc_kernel(nlat, polt + jfc * nlat, salt + jfc * 2, far, fai);
422 }
423 }
424 }
425 }
426 }
427
428 static inline void
fc2sp_kernel(long nlat,const double * pol,const double * far,const double * fai,double * sal)429 fc2sp_kernel(long nlat, const double *pol, const double *far, const double *fai, double *sal)
430 {
431 double sar = 0.0;
432 double sai = 0.0;
433 #ifdef HAVE_OPENMP4
434 #pragma omp simd reduction(+ : sar) reduction(+ : sai)
435 #endif
436 for (long lat = 0; lat < nlat; lat++)
437 {
438 sar += pol[lat] * far[lat];
439 sai += pol[lat] * fai[lat];
440 }
441 sal[0] = sar;
442 sal[1] = sai;
443 }
444
445 void
fc2sp(const double * fa,double * sa,const double * poli,long nlev,long nlat,long nfc,long nt)446 fc2sp(const double *fa, double *sa, const double *poli, long nlev, long nlat, long nfc, long nt)
447 {
448 long ntp1 = nt + 1;
449 long nsp2 = (nt + 1) * (nt + 2);
450
451 std::vector<long> cumindex(ntp1);
452 cumindex[0] = 0;
453 for (long jmm = 1; jmm < ntp1; jmm++) cumindex[jmm] = cumindex[jmm - 1] + (ntp1 - jmm + 1);
454
455 if (nlev >= Threading::ompNumThreads)
456 {
457 #ifdef _OPENMP
458 #pragma omp parallel for default(shared)
459 #endif
460 for (long lev = 0; lev < nlev; lev++)
461 {
462 auto fal = fa + lev * nfc * nlat;
463 auto sal = sa + lev * nsp2;
464
465 for (long jmm = 0; jmm < ntp1; jmm++)
466 {
467 auto polt = poli + cumindex[jmm] * nlat;
468 auto salt = sal + cumindex[jmm] * 2;
469 auto far = fal + jmm * 2 * nlat;
470 auto fai = far + nlat;
471 for (long jfc = 0; jfc < (ntp1 - jmm); jfc++)
472 {
473 fc2sp_kernel(nlat, polt + jfc * nlat, far, fai, salt + jfc * 2);
474 }
475 }
476 }
477 }
478 else
479 {
480 for (long lev = 0; lev < nlev; lev++)
481 {
482 auto fal = fa + lev * nfc * nlat;
483 auto sal = sa + lev * nsp2;
484
485 #ifdef _OPENMP
486 #pragma omp parallel for default(shared) schedule(dynamic)
487 #endif
488 for (long jmm = 0; jmm < ntp1; jmm++)
489 {
490 auto polt = poli + cumindex[jmm] * nlat;
491 auto salt = sal + cumindex[jmm] * 2;
492 auto far = fal + jmm * 2 * nlat;
493 auto fai = far + nlat;
494 for (long jfc = 0; jfc < (ntp1 - jmm); jfc++)
495 {
496 fc2sp_kernel(nlat, polt + jfc * nlat, far, fai, salt + jfc * 2);
497 }
498 }
499 }
500 }
501 }
502
503 /* ======================================== */
504 /* Convert Spectral Array to new truncation */
505 /* ======================================== */
506
507 void
sp2sp(double * arrayIn,long truncIn,double * arrayOut,long truncOut)508 sp2sp(double *arrayIn, long truncIn, double *arrayOut, long truncOut)
509 {
510 if (truncOut <= truncIn)
511 {
512 for (long n = 0; n <= truncOut; n++)
513 {
514 for (long m = n; m <= truncOut; m++)
515 {
516 *arrayOut++ = *arrayIn++;
517 *arrayOut++ = *arrayIn++;
518 }
519 arrayIn += 2 * (truncIn - truncOut);
520 }
521 }
522 else
523 {
524 for (long n = 0; n <= truncIn; n++)
525 {
526 for (long m = n; m <= truncIn; m++)
527 {
528 *arrayOut++ = *arrayIn++;
529 *arrayOut++ = *arrayIn++;
530 }
531 for (long m = truncIn + 1; m <= truncOut; ++m)
532 {
533 *arrayOut++ = 0.0;
534 *arrayOut++ = 0.0;
535 }
536 }
537 for (long n = truncIn + 1; n <= truncOut; ++n)
538 for (long m = n; m <= truncOut; ++m)
539 {
540 *arrayOut++ = 0.0;
541 *arrayOut++ = 0.0;
542 }
543 }
544 }
545
546 /* ======================================== */
547 /* Cut spectral wave numbers */
548 /* ======================================== */
549
550 void
spcut(double * arrayIn,double * arrayOut,long trunc,const int * waves)551 spcut(double *arrayIn, double *arrayOut, long trunc, const int *waves)
552 {
553 for (long n = 0; n <= trunc; n++)
554 {
555 for (long m = n; m <= trunc; m++)
556 {
557 if (waves[m])
558 {
559 *arrayOut++ = *arrayIn++;
560 *arrayOut++ = *arrayIn++;
561 }
562 else
563 {
564 *arrayOut++ = 0.0;
565 *arrayOut++ = 0.0;
566 arrayIn++;
567 arrayIn++;
568 }
569 }
570 }
571 }
572