1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2019 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief 8!> Routines to efficently collocate and integrate gaussians on a grid 9!> These use most of Joost's tricks and a couple more... 10!> result is *speed* and genericity 11!> \author Fawzi Mohamed, 2007 12!> \notes original available with BSD style license 13! ************************************************************************************************** 14MODULE gauss_colloc 15 16#:include "colloc_int_kloop.fypp" 17 18 USE d3_poly, ONLY: & 19 grad_size3, poly_affine_t3, poly_affine_t3t, poly_p_eval2b, poly_p_eval3b, & 20 poly_padd_uneval2b, poly_padd_uneval3b, poly_size1, poly_size2, poly_size3 21 USE kinds, ONLY: dp,& 22 int_8 23 USE lgrid_types, ONLY: lgrid_type 24 25!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads 26#include "./base/base_uses.f90" 27 28 IMPLICIT NONE 29 PRIVATE 30 31 PUBLIC :: collocGauss, & 32 integrateGaussFull 33 34#ifdef FD_DEBUG 35#define IF_CHECK(x,y) x 36#else 37#define IF_CHECK(x,y) y 38#endif 39 40 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'gauss_colloc' 41 42 REAL(dp), PARAMETER :: small = TINY(1.0_dp) 43 44 ! keep prettify happy (does not see the include) 45 INTEGER(KIND=int_8), PARAMETER :: unused_import_of_int_8 = 1 46 47CONTAINS 48 49! ************************************************************************************************** 50!> \brief collocate a periodically repeated gaussian on a non orthormbic grid 51!> 52!> this routine has been tested and works well with cells with 53!> det(h)/sqrt(tr(dot(h^T,h)))>0.2 (2 angles bigger than 24 deg or one angle 54!> bigger than 11 deg). 55!> Because of its numerics it might fail badly (infinity or NaN) with 56!> with more deformed cells. Avoiding this would be bossible only using 57!> IEEE numerics controls, which would also make everything slower and 58!> less supported. 59!> Still the actual numeric has been carefully tuned, and in normal cases 60!> and most anormal it should work. 61!> With det(h)/sqrt(tr(dot(h^T,h)))>0.2 I could not find any failure. 62!> 63!> \param h cell matrix 64!> \param h_inv inverse of the cell matrix 65!> \param grid the grid 66!> \param poly polynomial (d3_poly format) 67!> \param alphai exponential coeff 68!> \param posi position of the gaussian 69!> \param max_r2 maximum radius of collocation squared 70!> \param periodic array of 0 or 1 that says which dimensions have pbc (1=pbc) 71!> \param gdim dimension of the grid (grid might be a subset) 72!> \param local_bounds local bounds of the grid piece that is kept locally 73!> (i.e. of grid) the global grid is assumed to atart at 0,0,0 74!> \param local_shift start indexes of the local slice (i.e. of grid) 75!> \param poly_shift position of posi in the polynomial reference system. 76!> Set it to posi to use the global reference system. 77!> \param scale a global scale factor 78!> \param lgrid ... 79! ************************************************************************************************** 80 SUBROUTINE collocGauss(h, h_inv, grid, poly, alphai, posi, max_r2, & 81 periodic, gdim, local_bounds, local_shift, poly_shift, scale, lgrid) 82 REAL(dp), DIMENSION(0:2, 0:2), INTENT(in) :: h, h_inv 83 REAL(dp), DIMENSION(0:, 0:, 0:), INTENT(inout) :: grid 84 REAL(dp), DIMENSION(:), INTENT(inout) :: poly 85 REAL(dp), INTENT(in) :: alphai 86 REAL(dp), DIMENSION(0:2), INTENT(in) :: posi 87 REAL(dp), INTENT(in) :: max_r2 88 INTEGER, DIMENSION(0:2), INTENT(in) :: periodic 89 INTEGER, DIMENSION(0:2), INTENT(in), OPTIONAL :: gdim 90 INTEGER, DIMENSION(2, 0:2), INTENT(in), OPTIONAL :: local_bounds 91 INTEGER, DIMENSION(0:2), INTENT(in), OPTIONAL :: local_shift 92 REAL(dp), DIMENSION(0:2), INTENT(in), OPTIONAL :: poly_shift 93 REAL(dp), INTENT(in), OPTIONAL :: scale 94 TYPE(lgrid_type), INTENT(inout), OPTIONAL :: lgrid 95 96 CHARACTER(len=*), PARAMETER :: routineN = 'collocGauss', routineP = moduleN//':'//routineN 97 98 CALL colloc_int_body(h, h_inv, grid, poly, alphai, posi, max_r2, & 99 periodic, gdim, local_bounds, local_shift, & 100 poly_shift, scale, lgrid, integrate=.FALSE.) 101 102 END SUBROUTINE 103 104! ************************************************************************************************** 105!> \brief integrates a gaussian times any polynomial up to a give order. 106!> 107!> Most things are the same as for collocGauss (see its comments). 108!> Returns the integrals of all the monomials in d3 format into poly 109!> \param h ... 110!> \param h_inv ... 111!> \param grid ... 112!> \param poly ... 113!> \param alphai ... 114!> \param posi ... 115!> \param max_r2 ... 116!> \param periodic ... 117!> \param gdim ... 118!> \param local_bounds ... 119!> \param local_shift ... 120!> \param poly_shift ... 121!> \param scale ... 122! ************************************************************************************************** 123 SUBROUTINE integrateGaussFull(h, h_inv, grid, poly, alphai, posi, max_r2, & 124 periodic, gdim, local_bounds, local_shift, poly_shift, scale) 125 REAL(dp), DIMENSION(0:2, 0:2), INTENT(in) :: h, h_inv 126 REAL(dp), DIMENSION(0:, 0:, 0:), INTENT(inout) :: grid 127 REAL(dp), DIMENSION(:), INTENT(inout) :: poly 128 REAL(dp), INTENT(in) :: alphai 129 REAL(dp), DIMENSION(0:2), INTENT(in) :: posi 130 REAL(dp), INTENT(in) :: max_r2 131 INTEGER, DIMENSION(0:2), INTENT(in) :: periodic 132 INTEGER, DIMENSION(0:2), INTENT(in), OPTIONAL :: gdim 133 INTEGER, DIMENSION(2, 0:2), INTENT(in), OPTIONAL :: local_bounds 134 INTEGER, DIMENSION(0:2), INTENT(in), OPTIONAL :: local_shift 135 REAL(dp), DIMENSION(0:2), INTENT(in), OPTIONAL :: poly_shift 136 REAL(dp), INTENT(in), OPTIONAL :: scale 137 138 CHARACTER(len=*), PARAMETER :: routineN = 'integrateGaussFull', & 139 routineP = moduleN//':'//routineN 140 141 CALL colloc_int_body(h, h_inv, grid, poly, alphai, posi, max_r2, & 142 periodic, gdim, local_bounds, local_shift, & 143 poly_shift, scale, integrate=.TRUE.) 144 145 END SUBROUTINE 146 147! ************************************************************************************************** 148!> \brief Common code for collocGauss() and integrateGaussFull() 149!> \param h ... 150!> \param h_inv ... 151!> \param grid ... 152!> \param poly ... 153!> \param alphai ... 154!> \param posi ... 155!> \param max_r2 ... 156!> \param periodic ... 157!> \param gdim ... 158!> \param local_bounds ... 159!> \param local_shift ... 160!> \param poly_shift ... 161!> \param scale ... 162!> \param lgrid ... 163!> \param integrate ... 164! ************************************************************************************************** 165 SUBROUTINE colloc_int_body(h, h_inv, grid, poly, alphai, posi, max_r2, & 166 periodic, gdim, local_bounds, local_shift, poly_shift, scale, lgrid, integrate) 167 REAL(dp), DIMENSION(0:2, 0:2), & 168 INTENT(in) :: h, h_inv 169 REAL(dp), DIMENSION(0:, 0:, 0:), & 170 INTENT(inout) :: grid 171 REAL(dp), DIMENSION(:), INTENT(inout) :: poly 172 REAL(dp), INTENT(in) :: alphai 173 REAL(dp), DIMENSION(0:2), INTENT(in) :: posi 174 REAL(dp), INTENT(in) :: max_r2 175 INTEGER, DIMENSION(0:2), INTENT(in) :: periodic 176 INTEGER, DIMENSION(0:2), INTENT(in), & 177 OPTIONAL :: gdim 178 INTEGER, DIMENSION(2, 0:2), INTENT(in), & 179 OPTIONAL :: local_bounds 180 INTEGER, DIMENSION(0:2), INTENT(in), & 181 OPTIONAL :: local_shift 182 REAL(dp), DIMENSION(0:2), INTENT(in), & 183 OPTIONAL :: poly_shift 184 REAL(dp), INTENT(in), OPTIONAL :: scale 185 TYPE(lgrid_type), INTENT(inout), & 186 OPTIONAL :: lgrid 187 LOGICAL :: integrate 188 189 CHARACTER(len=*), PARAMETER :: routineN = 'colloc_int_body', & 190 routineP = moduleN//':'//routineN 191 192 INTEGER, DIMENSION(0:2), PARAMETER :: permut = (/2, 1, 0/) 193 INTEGER :: grad, i, i0, ii, iiShift, iiShift2, iistart, iistart2, ij, & 194 ijShift, iJump, ik, ikShift, ikShift2, ikstart, ikstart2, iend, iend2, & 195 imax, imax1, imin, imin1, istart, istart2, j, jend, jJump, jmax, jmax1, jmin, & 196 jmin1, jstart, k, kend, kend2, kgrad, kJump, kmax, kmax1, kmin, kmin1, & 197 kstart, kstart2, max_j, size_jk, size_k, size_ijk, ig, ithread, nthread 198 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: k_bounds 199 INTEGER, DIMENSION(0:2) :: cellShift, l_shift, l_ub, & 200 ndim, period, shiftPos, ldim2 201 INTEGER, DIMENSION(2, 0:2) :: l_bounds 202 LOGICAL :: has_overlap 203 REAL(dp) :: cci0, cci1, cci2, ccj0, ccj0_i0, ccj0_i1, ccj0_i2, ccj1, & 204 ccj1_i0, ccj1_i1, ccj2, cck0, cck0_0, cck0_0_p, cck0_i, cck0_i2, & 205 cck0_ij, cck0_j, cck0_j2, cck0_j_p, cck1, cck1_0, cck1_0_p, cck1_i, & 206 cck1_j, cck2, delta_i, delta_j, delta_k, g_scale, i_coeffn_i, icoeff0, & 207 ii_coeff0, ii_coeff2, ii_coeff2_jump, ii_coeffn, ii_coeffn_jump, & 208 ij_coeff0, ij_coeff0_jump, ij_coeff1, ik_coeff0, ik_coeff0_jump, & 209 ik_coeff1, j_coeffn_i, j_coeffn_j, jcoeff0, jj_coeff0, jj_coeff2, & 210 jj_coeffn, jk_coeff0, jk_coeff1, k_coeffn_i, k_coeffn_j, k_coeffn_k, & 211 kcoeff0, kk_coeff0, kk_coeff2, kk_coeffn, m(0:2, 0:2), maxr2, p_kk, & 212 r_0 213 REAL(dp) :: res_0, res_i, res_j, res_k, scaled_h(0:2, 0:2), sqDi, sqDj, & 214 sqDk 215 REAL(dp), ALLOCATABLE, DIMENSION(:) :: poly_ijk, poly_jk, xi 216 REAL(dp), DIMENSION(0:2) :: l, normD, p_shift, resPos, & 217 resPosReal, riPos, rpos, wrPos 218 219 REAL(dp) :: det, gval 220 REAL(dp), ALLOCATABLE, DIMENSION(:) :: k_vals 221 INTEGER, PARAMETER :: npoly = 1 222 REAL(dp), ALLOCATABLE, DIMENSION(:) :: poly_k 223 REAL(dp) :: p_v 224 225#define IF_FLAT(x,y) y 226 227 ithread = 0 228!$ ithread = omp_get_thread_num() 229 230 nthread = 1 231!$ nthread = omp_get_num_threads() 232 233 IF (integrate) THEN 234 poly = 0.0_dp 235 ELSE 236 IF (ALL(poly == 0.0_dp)) RETURN 237 ENDIF 238 239 IF (PRESENT(poly_shift)) THEN 240 p_shift = poly_shift 241 ELSE 242 p_shift = 0.0_dp 243 END IF 244 245 ldim2(permut(0)) = SIZE(grid, 1) 246 ldim2(permut(1)) = SIZE(grid, 2) 247 ldim2(permut(2)) = SIZE(grid, 3) 248 249 IF (PRESENT(gdim)) THEN 250 DO i = 0, 2 251 ndim(permut(i)) = gdim(i) 252 END DO 253 ELSE 254 ndim = ldim2 255 END IF 256 g_scale = 1.0_dp 257 IF (PRESENT(scale)) g_scale = scale 258 259 IF (integrate) THEN 260 det = (h(0, 0)*(h(1, 1)*h(2, 2)-h(1, 2)*h(2, 1)) & 261 -h(1, 0)*(h(0, 1)*h(2, 2)-h(0, 2)*h(2, 1)) & 262 +h(2, 0)*(h(0, 1)*h(1, 2)-h(0, 2)*h(1, 1))) 263 g_scale = g_scale*ABS(det)/REAL(INT(ndim(0), KIND=int_8)*INT(ndim(1), KIND=int_8)*INT(ndim(2), KIND=int_8), dp) 264 ENDIF 265 266 IF (PRESENT(local_bounds)) THEN 267 DO i = 0, 2 268 l_bounds(:, permut(i)) = local_bounds(:, i) 269 END DO 270 ELSE 271 l_bounds(1, :) = 0 272 l_bounds(2, :) = ndim-1 273 END IF 274 IF (PRESENT(local_shift)) THEN 275 DO i = 0, 2 276 l_shift(permut(i)) = local_shift(i) 277 END DO 278 ELSE 279 l_shift = 0 ! l_bounds(1,:) 280 END IF 281 l_ub = l_bounds(2, :)-l_bounds(1, :)+l_shift 282 DO i = 0, 2 283 CPASSERT(l_ub(i) < ldim2(i)) 284 END DO 285 286 DO i = 0, 2 287 period(permut(i)) = periodic(i) 288 END DO 289 CPASSERT(ALL(l_bounds(2, :) < ndim .OR. period(:) == 1)) 290 CPASSERT(ALL(l_bounds(1, :) >= 0 .OR. period(:) == 1)) 291 CPASSERT(ALL(l_bounds(2, :)-l_bounds(1, :) < ndim)) 292 rPos = 0.0_dp 293 DO j = 0, 2 294 DO i = 0, 2 295 rPos(permut(i)) = rPos(permut(i))+h_inv(i, j)*posi(j) 296 END DO 297 END DO 298 cellShift = FLOOR(rPos)*period 299 wrPos = rPos-REAL(cellShift, dp) 300 riPos = wrPos*ndim 301 shiftPos = FLOOR(riPos+0.5_dp) 302 resPos = riPos-shiftPos 303 normD = 1.0_dp/REAL(ndim, dp) 304 scaled_h = 0.0_dp 305 DO j = 0, 2 306 DO i = 0, 2 307 scaled_h(i, permut(j)) = h(i, j)*normD(permut(j)) 308 END DO 309 END DO 310 resPosReal = 0.0_dp 311 DO j = 0, 2 312 DO i = 0, 2 313 resPosReal(i) = resPosReal(i)+h(i, j)*(wrPos(permut(j))-normD(permut(j))*REAL(shiftPos(permut(j)), dp)) 314 END DO 315 END DO 316 317 !maxr2=0.0_dp 318 !DO j=0,2 319 ! DO i=0,2 320 ! ! guarantee at least the nearest points (this increases the sphere, increase just the box?) 321 ! maxr2=maxr2+(scaled_h(i,j))**2 322 ! END DO 323 !END DO 324 maxr2 = max_r2 !MAX(max_r2,maxr2) 325 326 ! build up quadratic form (ellipsoid) 327 m = 0.0_dp 328 DO j = 0, 2 329 DO i = 0, 2 330 DO k = 0, 2 331 m(i, j) = m(i, j)+scaled_h(k, i)*scaled_h(k, j) 332 END DO 333 END DO 334 END DO 335 336 l = 0.0_dp 337 DO j = 0, 2 338 DO i = 0, 2 339 l(j) = l(j)-2.0*resPos(i)*m(i, j) 340 END DO 341 END DO 342 343 r_0 = 0.0_dp 344 DO i = 0, 2 345 r_0 = r_0-0.5*resPos(i)*l(i) 346 END DO 347 348 ! calc i boundaries 349 cci2 = (m(2, 2)*m(0, 0)*m(1, 1)-m(2, 2)*m(0, 1)**2-m(1, 1)*m(0, 2)**2 & 350 +2.0_dp*m(0, 2)*m(0, 1)*m(1, 2)-m(0, 0)*m(1, 2)**2)/(m(2, 2)*m(1, 1)-m(1, 2)**2) 351 cci1 = -(-m(2, 2)*l(0)*m(1, 1)+m(2, 2)*m(0, 1)*l(1)+l(2)*m(0, 2)*m(1, 1) & 352 +l(0)*m(1, 2)**2-l(2)*m(0, 1)*m(1, 2)-m(1, 2)*l(1)*m(0, 2))/(m(2, 2)*m(1, 1)-m(1, 2)**2) 353 cci0 = -((-4.0_dp*m(2, 2)*r_0*m(1, 1)+m(2, 2)*l(1)**2+l(2)**2*m(1, 1) & 354 -2.0_dp*l(1)*m(1, 2)*l(2)+4.0_dp*r_0*m(1, 2)**2) & 355 /(m(2, 2)*m(1, 1)-m(1, 2)**2))/4.0_dp-maxr2 356 delta_i = cci1*cci1-4.0_dp*cci2*cci0 357 IF (delta_i <= 0) RETURN 358 sqDi = SQRT(delta_i) 359 imin = CEILING((-cci1-sqDi)/(2.0_dp*cci2)) 360 imax = FLOOR((-cci1+sqDi)/(2.0_dp*cci2)) 361 !! box early return 362 363 IF (period(0) == 1) THEN 364 has_overlap = imax-imin+1 > ndim(0) .OR. (l_bounds(1, 0) == 0 .AND. l_bounds(2, 0) == ndim(0)-1) 365 IF (.NOT. has_overlap) THEN 366 imin1 = MODULO(imin+shiftPos(0), ndim(0)) 367 imax1 = imin1+imax-imin+1 368 IF (imin1 < l_bounds(1, 0)) THEN 369 has_overlap = imax1 >= l_bounds(1, 0) 370 ELSE 371 has_overlap = imin1 <= l_bounds(2, 0) .OR. (imax1 >= ndim(0) .AND. l_bounds(1, 0) <= imax1+ndim(0)) 372 END IF 373 IF (.NOT. has_overlap) RETURN 374 END IF 375 ELSE 376 IF (imax+shiftPos(0) < l_bounds(1, 0) .OR. imin+shiftPos(0) > l_bounds(2, 0)) RETURN 377 END IF 378 379 ! j box bounds 380 has_overlap = l_bounds(1, 1) == 0 .AND. l_bounds(2, 1) == ndim(1)-1 381 IF (.NOT. has_overlap) THEN 382 ccj2 = (m(0, 0)*m(2, 2)*m(1, 1)-m(0, 0)*m(1, 2)**2-m(0, 1)**2*m(2, 2) & 383 +2.0_dp*m(0, 1)*m(0, 2)*m(1, 2)-m(1, 1)*m(0, 2)**2) & 384 /(m(0, 0)*m(2, 2)-m(0, 2)**2) 385 ccj1 = -(-m(0, 0)*l(1)*m(2, 2)+m(0, 0)*l(2)*m(1, 2)+l(0)*m(0, 1)*m(2, 2) & 386 -m(0, 2)*l(2)*m(0, 1)-l(0)*m(0, 2)*m(1, 2)+l(1)*m(0, 2)**2) & 387 /(m(0, 0)*m(2, 2)-m(0, 2)**2) 388 ccj0 = (4.0_dp*m(0, 0)*m(2, 2)*r_0-m(0, 0)*l(2)**2-m(2, 2)*l(0)**2 & 389 +2.0_dp*m(0, 2)*l(2)*l(0)-4.0_dp*r_0*m(0, 2)**2) & 390 /(m(0, 0)*m(2, 2)-m(0, 2)**2)/4.0_dp-maxr2 391 delta_j = ccj1*ccj1-4.0_dp*ccj2*ccj0 392 IF (delta_j <= 0) RETURN 393 sqDj = SQRT(delta_j) 394 jmin = CEILING((-ccj1-sqDj)/(2.0_dp*ccj2)) 395 jmax = FLOOR((-ccj1+sqDj)/(2.0_dp*ccj2)) 396 IF (period(1) == 1) THEN 397 IF (jmax-jmin+1 < ndim(1)) THEN 398 jmin1 = MODULO(jmin+shiftPos(1), ndim(1)) 399 jmax1 = jmin1+jmax-jmin+1 400 IF (jmin1 < l_bounds(1, 1)) THEN 401 has_overlap = jmax1 >= l_bounds(1, 1) 402 ELSE 403 has_overlap = jmin1 <= l_bounds(2, 1) .OR. (jmax1 >= ndim(1) .AND. (l_bounds(1, 1) <= jmax1-ndim(1))) 404 END IF 405 IF (.NOT. has_overlap) RETURN 406 END IF 407 ELSE 408 IF (jmax+shiftPos(1) < l_bounds(1, 1) .OR. jmin+shiftPos(1) > l_bounds(2, 1)) RETURN 409 END IF 410 END IF 411 412 ! k box bounds 413 has_overlap = l_bounds(1, 2) == 0 .AND. l_bounds(2, 2) == ndim(2)-1 414 IF (.NOT. has_overlap) THEN 415 cck2 = (m(0, 0)*m(2, 2)*m(1, 1)-m(0, 0)*m(1, 2)**2-m(0, 1)**2*m(2, 2) & 416 +2.0_dp*m(0, 1)*m(0, 2)*m(1, 2)-m(1, 1)*m(0, 2)**2)/(m(0, 0)*m(1, 1)-m(0, 1)**2) 417 cck1 = (m(0, 0)*l(2)*m(1, 1)-m(0, 0)*m(1, 2)*l(1)-m(0, 2)*l(0)*m(1, 1) & 418 +l(0)*m(0, 1)*m(1, 2)-l(2)*m(0, 1)**2+m(0, 1)*l(1)*m(0, 2))/(m(0, 0)*m(1, 1)-m(0, 1)**2) 419 cck0 = (4.0_dp*m(0, 0)*m(1, 1)*r_0-m(0, 0)*l(1)**2-m(1, 1)*l(0)**2 & 420 +2.0_dp*m(0, 1)*l(1)*l(0)-4.0_dp*r_0*m(0, 1)**2) & 421 /(m(0, 0)*m(1, 1)-m(0, 1)**2)/4.0_dp-maxr2 422 delta_k = cck1*cck1-4.0_dp*cck2*cck0 423 IF (delta_k <= 0) RETURN 424 sqDk = SQRT(delta_k) 425 kmin = CEILING((-cck1-sqDk)/(2.0_dp*cck2)) 426 kmax = FLOOR((-cck1+sqDk)/(2.0_dp*cck2)) 427 428 IF (period(2) == 1) THEN 429 IF (kmax-kmin+1 < ndim(2)) THEN 430 kmin1 = MODULO(kmin+shiftPos(2), ndim(2)) 431 kmax1 = kmin1+kmax-kmin+1 432 IF (kmin1 < l_bounds(1, 2)) THEN 433 has_overlap = kmax1 >= l_bounds(1, 2) 434 ELSE 435 has_overlap = kmin1 <= l_bounds(2, 2) .OR. & 436 (kmax1 >= ndim(2) .AND. (l_bounds(1, 2) <= MODULO(kmax1, ndim(2)))) 437 END IF 438 IF (.NOT. has_overlap) RETURN 439 END IF 440 ELSE 441 IF (kmax+shiftPos(2) < l_bounds(1, 2) .OR. kmin+shiftPos(2) > l_bounds(2, 2)) RETURN 442 END IF 443 END IF 444 445 ! k bounds (cache a la cube_info, or inversely integrate in the collocate loop?) 446 ccj2 = (m(2, 2)*m(1, 1)-m(1, 2)**2)/m(2, 2) 447 ccj1_i1 = (2*m(2, 2)*m(0, 1)-2*m(0, 2)*m(1, 2))/m(2, 2) 448 ccj1_i0 = (-l(2)*m(1, 2)+m(2, 2)*l(1))/m(2, 2) 449 ccj0_i2 = (m(2, 2)*m(0, 0)-m(0, 2)**2)/m(2, 2) 450 ccj0_i1 = (m(2, 2)*l(0)-m(0, 2)*l(2))/m(2, 2) 451 ccj0_i0 = (m(2, 2)*r_0-0.25*l(2)**2)/m(2, 2)-maxr2 452 cck2 = m(2, 2) 453 cck1_i = 2*m(0, 2) 454 cck1_j = 2*m(1, 2) 455 cck1_0 = l(2) 456 cck0_i2 = m(0, 0) 457 cck0_ij = 2*m(0, 1) 458 cck0_i = l(0) 459 cck0_j2 = m(1, 1) 460 cck0_j = l(1) 461 cck0_0 = r_0-maxr2 462 463 ! find maximum number of j 464 max_j = 0 465 DO i0 = 0, 1 466 i = (imin+imax)/2+i0 467 ccj1 = ccj1_i1*i+ccj1_i0 468 ccj0 = (ccj0_i2*i+ccj0_i1)*i+ccj0_i0 469 delta_j = ccj1*ccj1-4*ccj2*ccj0 470 IF (delta_j >= 0) THEN 471 sqDj = SQRT(delta_j) 472 max_j = MAX(max_j, FLOOR((-ccj1+sqDj)/(2.0_dp*ccj2)) & 473 -CEILING((-ccj1-sqDj)/(2.0_dp*ccj2))+1) 474 END IF 475 END DO 476 max_j = max_j+1 ! just to be sure... 477 IF (period(1) == 0) max_j = MIN(max_j, l_bounds(2, 1)-l_bounds(1, 1)+1) 478 479 IF (period(0) == 0) THEN 480 imin = MAX(l_bounds(1, 0)-shiftPos(0), imin) 481 imax = MIN(l_bounds(2, 0)-shiftPos(0), imax) 482 END IF 483 484 ! k bounds (cache a la cube_info?) 485 has_overlap = .FALSE. 486 ALLOCATE (k_bounds(0:1, 0:max_j-1, 0:MAX(0, imax-imin+1))) 487 ! k_bounds=0 488 istart = imin 489 iiShift = shiftPos(0)-l_bounds(2, 0)+istart 490 IF (iiShift > 0) iiShift = iiShift+ndim(0)-1 491 iiShift = (iiShift/ndim(0))*ndim(0)-shiftPos(0) 492 !iiShift=CEILING(REAL(shiftPos(0)+istart-l_bounds(2,0))/REAL(ndim(0)))*ndim(0)-shiftPos(0)) 493 istart = MAX(iiShift+l_bounds(1, 0), istart) 494 iend = MIN(iiShift+l_bounds(2, 0), imax) 495 iJump = ndim(0)-l_bounds(2, 0)+l_bounds(1, 0)-1 496 jJump = ndim(1)-l_bounds(2, 1)+l_bounds(1, 1)-1 497 DO 498 DO i = istart, iend 499 ccj1 = ccj1_i1*i+ccj1_i0 500 ccj0 = (ccj0_i2*i+ccj0_i1)*i+ccj0_i0 501 delta_j = ccj1*ccj1-4*ccj2*ccj0 502 IF (delta_j < 0) CONTINUE 503 sqDj = SQRT(delta_j) 504 jmin = CEILING((-ccj1-sqDj)/(2.0_dp*ccj2)) 505 jmax = FLOOR((-ccj1+sqDj)/(2.0_dp*ccj2)) 506 cck0_0_p = cck0_0+(cck0_i2*i+cck0_i)*i 507 cck0_j_p = cck0_j+cck0_ij*i 508 cck1_0_p = cck1_0+cck1_i*i 509 IF (period(1) == 0) THEN 510 jmin = MAX(l_bounds(1, 1)-shiftPos(1), jmin) 511 jmax = MIN(l_bounds(2, 1)-shiftPos(1), jmax) 512 END IF 513 jstart = jmin 514 ijShift = shiftPos(1)+jstart-l_bounds(2, 1) 515 IF (ijShift > 0) ijShift = ijShift+ndim(1)-1 516 ijShift = (ijShift/ndim(1))*ndim(1)-shiftPos(1) 517 ! ijShift=CEILING(REAL(shiftPos(1)+jstart-l_bounds(2,1))/REAL(ndim(1)))*ndim(1)-shiftPos(1) 518 jstart = MAX(ijShift+l_bounds(1, 1), jstart) 519 jend = MIN(ijShift+l_bounds(2, 1), jmax) 520 DO 521 DO j = jstart, jend 522 cck1 = cck1_0_p+cck1_j*j 523 cck0 = cck0_0_p+(cck0_j_p+cck0_j2*j)*j 524 525 delta_k = cck1*cck1-4*cck2*cck0 526 IF (delta_k < 0) THEN 527 k_bounds(0, j-jmin, i-imin) = 0 ! CEILING((-cck1)/(2.0_dp*cck2)) 528 k_bounds(1, j-jmin, i-imin) = -1 ! k_bounds(0,j-jmin,i-imin)-1 529 ELSE 530 sqDk = SQRT(delta_k) 531 kmin = CEILING((-cck1-sqDk)/(2.0_dp*cck2)) 532 kmax = FLOOR((-cck1+sqDk)/(2.0_dp*cck2)) 533 534 ! ! reduce kmax,kmin 535 ! ! this should be done later if k_bounds are shared by threads with different slices 536 ! ikShift=FLOOR(REAL(shiftPos(2)+kmax-l_bounds(1,2))/REAL(ndim(2)))*ndim(2)-shiftPos(2) 537 ! kmax=MIN(kmax,ikShift+l_bounds(2,2)) 538 ! ikShift2=CEILING(REAL(shiftPos(2)-l_bounds(2,2)+kmin)/REAL(ndim(2)))*ndim(2)-shiftPos(2) 539 ! kmin=MAX(kmin,ikShift2+l_bounds(1,2)) 540 541 k_bounds(0, j-jmin, i-imin) = kmin 542 k_bounds(1, j-jmin, i-imin) = kmax 543 IF (kmax >= kmin) has_overlap = .TRUE. 544 END IF 545 END DO 546 jstart = jend+jJump+1 547 IF (jstart > jmax) EXIT 548 jend = MIN(jend+ndim(1), jmax) 549 END DO 550 END DO 551 istart = iend+iJump+1 552 IF (istart > imax) EXIT 553 iend = MIN(iend+ndim(0), imax) 554 END DO 555 IF (period(2) == 0) THEN 556 k_bounds(0, :, :) = MAX(l_bounds(1, 2)-shiftPos(2), k_bounds(0, :, :)) 557 k_bounds(1, :, :) = MIN(l_bounds(2, 2)-shiftPos(2), k_bounds(1, :, :)) 558 END IF 559 560 IF (.NOT. has_overlap) RETURN 561 562 ! poly x,y,z -> i,j,k 563 grad = grad_size3(SIZE(poly)/npoly) 564 size_jk = poly_size2(grad)*npoly 565 size_k = poly_size1(grad)*npoly 566 size_ijk = poly_size3(grad)*npoly 567 ALLOCATE (poly_ijk(size_ijk), & 568 poly_jk(size_jk), & 569 xi(grad+1)) 570 571 IF (integrate) THEN 572 ALLOCATE (k_vals(0:grad)) 573 ELSE 574 ALLOCATE (poly_k(0:size_k-1)) 575 ENDIF 576 577 IF (integrate) THEN 578 CPASSERT(SIZE(poly) == poly_size3(grad)) 579 poly_ijk = 0.0_dp 580 ELSE 581 CALL poly_affine_t3(poly, scaled_h, -resPosReal+p_shift, poly_ijk, & 582 npoly=npoly) 583 ENDIF 584 585 ij_coeff0 = EXP(-2.0_dp*alphai*m(0, 1)) 586 ik_coeff0 = EXP(-2.0_dp*alphai*m(0, 2)) 587 ii_coeff0 = EXP(-alphai*m(0, 0)) 588 jk_coeff0 = EXP(-2.0_dp*alphai*m(1, 2)) 589 jj_coeff0 = EXP(-alphai*m(1, 1)) 590 kk_coeff0 = EXP(-alphai*m(2, 2)) 591 jk_coeff1 = 1.0_dp/jk_coeff0 592 ij_coeff1 = 1.0_dp/ij_coeff0 593 ik_coeff1 = 1.0_dp/ik_coeff0 594 ii_coeff2 = ii_coeff0*ii_coeff0 595 jj_coeff2 = jj_coeff0*jj_coeff0 596 kk_coeff2 = kk_coeff0*kk_coeff0 597 icoeff0 = EXP(-alphai*l(0)) 598 jcoeff0 = EXP(-alphai*l(1)) 599 kcoeff0 = EXP(-alphai*l(2)) 600 res_0 = EXP(-alphai*r_0)*g_scale 601 602 i_coeffn_i = icoeff0 603 j_coeffn_i = jcoeff0 604 k_coeffn_i = kcoeff0 605 ii_coeffn = i_coeffn_i*ii_coeff0 606 res_i = res_0 607 608 iJump = ndim(0)-l_bounds(2, 0)+l_bounds(1, 0)-1 609 istart = MAX(0, imin) 610 iiShift = shiftPos(0)-l_bounds(2, 0)+istart 611 IF (iiShift > 0) iiShift = iiShift+ndim(0)-1 612 iiShift = (iiShift/ndim(0))*ndim(0)-shiftPos(0) 613 !iiShift=CEILING(REAL(shiftPos(0)+istart-l_bounds(2,0))/REAL(ndim(0)))*ndim(0)-shiftPos(0) 614 istart = MAX(iiShift+l_bounds(1, 0), istart) 615 iistart = istart-iiShift-l_bounds(1, 0)+l_shift(0) 616 istart2 = MIN(-1, imax) 617 iiShift2 = shiftPos(0)+istart2-l_bounds(1, 0) 618 IF (iiShift2 < 0) iiShift2 = iiShift2-ndim(0)+1 619 iiShift2 = (iiShift2/ndim(0))*ndim(0)-shiftPos(0) 620 !iiShift2=FLOOR(REAL(shiftPos(0)+istart2-l_bounds(1,0))/REAL(ndim(0)))*ndim(0)-shiftPos(0) 621 istart2 = MIN(iiShift2+l_bounds(2, 0), istart2) 622 iistart2 = istart2-iiShift2-l_bounds(1, 0)+l_shift(0) 623 624 IF (iJump /= 0 .AND. (iistart+imax-istart >= ndim(0)+l_shift(0) .OR. & 625 iistart2+imin-istart2 <= l_ub(0)-ndim(0))) THEN 626 ! will wrap 627 ij_coeff0_jump = ij_coeff0**(iJump) 628 ik_coeff0_jump = ik_coeff0**(iJump) 629 ii_coeff2_jump = ii_coeff2**(iJump) 630 ii_coeffn_jump = ii_coeff0**((iJump)*(iJump-1)) 631 ELSE 632 ij_coeff0_jump = 1.0_dp 633 ik_coeff0_jump = 1.0_dp 634 ii_coeff2_jump = 1.0_dp 635 ii_coeffn_jump = 1.0_dp 636 END IF 637 638 iend = MIN(iiShift+l_bounds(2, 0), imax) 639 ii = iistart IF_FLAT(*iidim,) 640 IF (i > 0) THEN 641 ii_coeffn = i_coeffn_i*ii_coeff0**(2*istart+1) 642 j_coeffn_i = jcoeff0*ij_coeff0**istart 643 k_coeffn_i = kcoeff0*ik_coeff0**istart 644 res_i = res_0*(ii_coeff0**istart*i_coeffn_i)**istart 645 END IF 646 DO 647 DO i = istart, iend 648 ! perform j loop 649 IF (ABS(res_i) > small) THEN 650 CALL j_loop 651 END IF 652 j_coeffn_i = j_coeffn_i*ij_coeff0 653 k_coeffn_i = k_coeffn_i*ik_coeff0 654 res_i = res_i*ii_coeffn 655 ii_coeffn = ii_coeffn*ii_coeff2 656 ii = ii+IF_FLAT(iidim, 1) 657 END DO 658 istart = iend+iJump+1 659 IF (istart > imax) EXIT 660 iend = MIN(iend+ndim(0), imax) 661 ii = l_shift(0) IF_FLAT(*iidim,) 662 j_coeffn_i = j_coeffn_i*ij_coeff0_jump 663 k_coeffn_i = k_coeffn_i*ik_coeff0_jump 664 res_i = res_i*ii_coeffn**(iJump)*ii_coeffn_jump 665 ii_coeffn = ii_coeffn*ii_coeff2_jump 666 END DO 667 668 ! neg i side 669 i_coeffn_i = 1.0_dp/icoeff0 670 j_coeffn_i = jcoeff0 671 k_coeffn_i = kcoeff0 672 res_i = res_0 673 ii_coeffn = i_coeffn_i*ii_coeff0 674 675 iend2 = MAX(iiShift2+l_bounds(1, 0), imin) 676 ii = iistart2 IF_FLAT(*iidim,) 677 IF (istart2 < -1) THEN 678 ii_coeffn = i_coeffn_i*ii_coeff0**(-(2*istart2+1)) 679 j_coeffn_i = jcoeff0*ij_coeff0**(istart2+1) 680 k_coeffn_i = kcoeff0*ik_coeff0**(istart2+1) 681 res_i = res_0*(ii_coeff0**(-istart2-1)*i_coeffn_i)**(-istart2-1) 682 END IF 683 DO 684 DO i = istart2, iend2, -1 685 j_coeffn_i = j_coeffn_i*ij_coeff1 686 k_coeffn_i = k_coeffn_i*ik_coeff1 687 res_i = res_i*ii_coeffn 688 ii_coeffn = ii_coeffn*ii_coeff2 689 690 ! perform j loop 691 IF (ABS(res_i) > small) THEN 692 CALL j_loop 693 END IF 694 ii = ii-IF_FLAT(iidim, 1) 695 END DO 696 istart2 = iend2-iJump-1 697 IF (istart2 < imin) EXIT 698 iend2 = MAX(iend2-ndim(0), imin) 699 ii = l_ub(0) IF_FLAT(*iidim,) 700 j_coeffn_i = j_coeffn_i/ij_coeff0_jump 701 k_coeffn_i = k_coeffn_i/ik_coeff0_jump 702 res_i = res_i*ii_coeffn**iJump*ii_coeffn_jump 703 ii_coeffn = ii_coeffn*ii_coeff2_jump 704 END DO 705 706 ! the final cleanup 707 IF (integrate) THEN 708 CALL poly_affine_t3t(poly_ijk, scaled_h, -resPosReal+p_shift, poly, & 709 npoly=npoly) 710 END IF 711 712 ! rely on compiler to deallocate ALLOCATABLEs 713 714 CONTAINS 715 716! ************************************************************************************************** 717!> \brief ... 718! ************************************************************************************************** 719 SUBROUTINE j_loop() 720 ! calculate j bounds 721 ccj1 = ccj1_i1*i+ccj1_i0 722 ccj0 = (ccj0_i2*i+ccj0_i1)*i+ccj0_i0 723 delta_j = ccj1*ccj1-4*ccj2*ccj0 724 IF (delta_j < 0) THEN 725 RETURN 726 END IF 727 sqDj = SQRT(delta_j) 728 jmin = CEILING((-ccj1-sqDj)/(2.0_dp*ccj2)) 729 jmax = FLOOR((-ccj1+sqDj)/(2.0_dp*ccj2)) 730 731 IF (integrate) THEN 732 poly_jk = 0.0_dp 733 ELSE 734 CALL poly_p_eval3b(IF_CHECK(poly_ijk, poly_ijk(1)), size_ijk, REAL(i, dp), & 735 IF_CHECK(poly_jk, poly_jk(1)), size_jk, & 736 npoly=npoly, grad=grad, xi=IF_CHECK(xi, xi(1))) 737 ENDIF 738 739 IF (period(1) == 0) THEN 740 jmin = MAX(l_bounds(1, 1)-shiftPos(1), jmin) 741 jmax = MIN(l_bounds(2, 1)-shiftPos(1), jmax) 742 END IF 743 744 ! pos j side 745 j_coeffn_j = j_coeffn_i 746 k_coeffn_j = k_coeffn_i 747 jj_coeffn = j_coeffn_j*jj_coeff0 748 res_j = res_i 749 750 jJump = ndim(1)-l_bounds(2, 1)+l_bounds(1, 1) 751 jstart = MAX(0, jmin) 752 ijShift = shiftPos(1)+jstart-l_bounds(2, 1) 753 IF (ijShift > 0) ijShift = ijShift+ndim(1)-1 754 ijShift = (ijShift/ndim(1))*ndim(1)-shiftPos(1) 755 !ijShift=CEILING(REAL(shiftPos(1)+jstart-l_bounds(2,1))/REAL(ndim(1)))*ndim(1)-shiftPos(1) 756 jstart = MAX(ijShift+l_bounds(1, 1), jstart) 757 jend = MIN(ijShift+l_bounds(2, 1), jmax) 758 ij = (jstart-ijShift-l_bounds(1, 1)+l_shift(1)) IF_FLAT(*ijdim,) 759 760 IF (jstart > 0) THEN 761 k_coeffn_j = k_coeffn_i*jk_coeff0**jstart 762 jj_coeffn = j_coeffn_j*jj_coeff0**(2*jstart+1) 763 res_j = res_i*(jj_coeff0**jstart*j_coeffn_j)**jstart 764 END IF 765 DO 766 DO j = jstart, jend 767 kmin = k_bounds(0, j-jmin, i-imin) 768 kmax = k_bounds(1, j-jmin, i-imin) 769 ! do k loop 770 IF (res_j /= 0.0_dp .AND. k_coeffn_j /= 0.0_dp .AND. kmin <= kmax & 771 .AND. ABS(res_j) > small) THEN 772 CALL k_loop 773 END IF 774 k_coeffn_j = k_coeffn_j*jk_coeff0 775 res_j = res_j*jj_coeffn 776 jj_coeffn = jj_coeffn*jj_coeff2 777 ij = ij+IF_FLAT(ijdim, 1) 778 END DO 779 jstart = jend+jJump 780 IF (jstart > jmax) EXIT 781 ij = l_shift(1) IF_FLAT(*ijdim,) 782 jend = MIN(jend+ndim(1), jmax) 783 IF (jJump /= 1) THEN ! remove if? 784 k_coeffn_j = k_coeffn_i*jk_coeff0**jstart 785 jj_coeffn = j_coeffn_j*jj_coeff0**(2*jstart+1) 786 res_j = res_i*(jj_coeff0**jstart*j_coeffn_j)**jstart 787 END IF 788 END DO 789 790 ! neg j side 791 j_coeffn_j = 1.0_dp/j_coeffn_i 792 k_coeffn_j = k_coeffn_i 793 jj_coeffn = j_coeffn_j*jj_coeff0 794 res_j = res_i 795 796 jstart = MIN(-1, jmax) 797 ijShift = shiftPos(1)+jstart-l_bounds(1, 1) 798 IF (ijShift < 0) ijShift = ijShift-ndim(1)+1 799 ijShift = (ijShift/ndim(1))*ndim(1)-shiftPos(1) 800 !ijShift=FLOOR(REAL(shiftPos(1)+jstart-l_bounds(1,1))/REAL(ndim(1)))*ndim(1)-shiftPos(1)) 801 jstart = MIN(ijShift+l_bounds(2, 1), jstart) 802 jend = MAX(ijShift+l_bounds(1, 1), jmin) 803 ij = (jstart-ijShift-l_bounds(1, 1)+l_shift(1)) IF_FLAT(*ijdim,) 804 IF (jstart < -1) THEN 805 k_coeffn_j = k_coeffn_i*jk_coeff0**(jstart+1) 806 jj_coeffn = j_coeffn_j*jj_coeff0**(-(2*jstart+1)) 807 res_j = res_i*(jj_coeff0**(-jstart-1)*j_coeffn_j)**(-jstart-1) 808 END IF 809 DO 810 DO j = jstart, jend, -1 811 k_coeffn_j = k_coeffn_j*jk_coeff1 812 res_j = res_j*jj_coeffn 813 jj_coeffn = jj_coeffn*jj_coeff2 814 815 kmin = k_bounds(0, j-jmin, i-imin) 816 kmax = k_bounds(1, j-jmin, i-imin) 817 ! perform k loop 818 IF (res_j /= 0.0_dp .AND. k_coeffn_j /= 0.0_dp .AND. kmin <= kmax & 819 .AND. ABS(res_j) > small) THEN 820 CALL k_loop 821 END IF 822 ij = ij-IF_FLAT(ijdim, 1) 823 END DO 824 jstart = jend-jJump 825 IF (jstart < jmin) EXIT 826 jend = MAX(jend-ndim(1), jmin) 827 ij = l_ub(1) IF_FLAT(*ijdim,) 828 IF (jJump /= 1) THEN ! remove if? 829 k_coeffn_j = k_coeffn_i*jk_coeff0**(jstart+1) 830 jj_coeffn = j_coeffn_j*jj_coeff0**(-(2*jstart+1)) 831 res_j = res_i*(jj_coeff0**(-jstart-1)*j_coeffn_j)**(-jstart-1) 832 END IF 833 END DO 834 835 IF (integrate) THEN 836 CALL poly_padd_uneval3b(IF_CHECK(poly_ijk, poly_ijk(1)), size_ijk, REAL(i, dp), & 837 IF_CHECK(poly_jk, poly_jk(1)), size_jk, & 838 npoly=npoly, grad=grad, xi=IF_CHECK(xi, xi(1))) 839 !CALL poly_padd_uneval3(poly_ijk,REAL(i,dp),poly_jk,npoly=npoly) 840 ENDIF 841 END SUBROUTINE 842 843! ************************************************************************************************** 844!> \brief ... 845! ************************************************************************************************** 846 SUBROUTINE k_loop() 847 IF (integrate) THEN 848 SELECT CASE (grad) 849 CASE (1) 850 CALL kloop1_int() 851 CASE (2) 852 CALL kloop2_int() 853 CASE (3) 854 CALL kloop3_int() 855 CASE (4) 856 CALL kloop4_int() 857 CASE (5) 858 CALL kloop5_int() 859 CASE (6) 860 CALL kloop6_int() 861 CASE (7) 862 CALL kloop7_int() 863 CASE (8) 864 CALL kloop8_int() 865 CASE default 866 CALL kloopdefault_int(grad) 867 END SELECT 868 ELSE 869 SELECT CASE (grad) 870 CASE (1) 871 CALL kloop1_col() 872 CASE (2) 873 CALL kloop2_col() 874 CASE (3) 875 CALL kloop3_col() 876 CASE (4) 877 CALL kloop4_col() 878 CASE (5) 879 CALL kloop5_col() 880 CASE (6) 881 CALL kloop6_col() 882 CASE (7) 883 CALL kloop7_col() 884 CASE (8) 885 CALL kloop8_col() 886 CASE default 887 CALL kloopdefault_col(grad) 888 END SELECT 889 ENDIF 890 END SUBROUTINE 891 892$:colloc_int_kloop() 893$:colloc_int_kloop(FMG_INTEGRATE_FULL=True) 894 895#:for i in range(1, 9) 896$:colloc_int_kloop(grad_val=i) 897$:colloc_int_kloop(grad_val=i, FMG_INTEGRATE_FULL=True) 898#:endfor 899 900#undef IF_FLAT 901 END SUBROUTINE colloc_int_body 902 903END MODULE gauss_colloc 904