1! ====================================================================================================== 2! This kernel represents a distillation of *part* of 3! the taumol04 calculation in the gas optics part of the PSRAD 4! atmospheric 5! radiation code. 6! 7! It is meant to show conceptually how one might "SIMD-ize" swaths of 8! the taumol04 code related to calculating the 9! taug term, so that the impact of the conditional expression on 10! specparm could be reduced and at least partial vectorization 11! across columns could be achieved. 12! 13! I consider it at this point to be "compiling pseudo-code". 14! 15! By this I mean that the code as written compiles under ifort, but has 16! not been tested 17! for correctness, nor I have written a driver routine for it. It does 18! not contain everything 19! that is going on in the taug parent taumol04 code, but I don't claim 20! to actually completely 21! understand the physical meaning of all or even most of the inputs 22! required to make it run. 23! 24! It has been written to vectorize, but apparently does not actually do 25! that 26! under the ifort V13 compiler with the -xHost -O3 level of 27! optimization, even with !dir$ assume_aligned directives. 28! I hypothesize that the compiler is baulking to do so for the indirect 29! addressed calls into the absa 30! look-up table, either that or 4 byte integers may be causing alignment 31! issues relative to 8 byte reals. Anyway, 32! it seems to complain about the key loop being too complex. 33! ====================================================================================================== 34MODULE mo_taumol04 35 USE mo_kind, only:wp 36 USE mo_lrtm_setup, ONLY: nspa 37 USE mo_lrtm_setup, ONLY: nspb 38 USE mo_lrtm_setup, ONLY: ngc 39 USE rrlw_planck, ONLY: chi_mls 40 USE rrlw_kg04, ONLY: selfref 41 USE rrlw_kg04, ONLY: forref 42 USE rrlw_kg04, ONLY: absa 43 USE rrlw_kg04, ONLY: fracrefa 44 USE rrlw_kg04, ONLY: absb 45 USE rrlw_kg04, ONLY: fracrefb 46 IMPLICIT NONE 47 PRIVATE 48 PUBLIC taumol04_lwr,taumol04_upr 49 CONTAINS 50 SUBROUTINE taumol04_lwr(ncol, laytrop, nlayers, & 51 rat_h2oco2, colco2, colh2o, coldry, & 52 fac0, fac1, minorfrac, & 53 selffac,selffrac,forfac,forfrac, & 54 jp, jt, ig, indself, & 55 indfor, & 56 taug, fracs) 57 IMPLICIT NONE 58 59 real(kind=wp), PARAMETER :: oneminus = 1.0_wp - 1.0e-06_wp 60 integer, intent(in) :: ncol ! number of simd columns 61 integer, intent(in) :: laytrop ! number of layers forwer atmosphere kernel 62 integer, intent(in) :: nlayers ! total number of layers 63 real(kind=wp), intent(in), dimension(ncol,0:1,nlayers) :: rat_h2oco2,fac0,fac1 ! not sure of ncol depend 64 real(kind=wp), intent(in), dimension(ncol,nlayers) :: colco2,colh2o,coldry ! these appear to be gas concentrations 65 66 real(kind=wp), intent(in), dimension(ncol,nlayers) :: selffac,selffrac ! not sure of ncol depend 67 real(kind=wp), intent(in), dimension(ncol,nlayers) :: forfac,forfrac ! not sure of ncol depend 68 real(kind=wp), intent(in), dimension(ncol,nlayers) :: minorfrac ! not sure of ncol depend 69 70 ! Look up tables and related lookup indices 71 ! I assume all lookup indices depend on 3D position 72 ! ================================================= 73 74 integer, intent(in) :: jp(ncol,nlayers) ! I assume jp depends on ncol 75 integer, intent(in) :: jt(ncol,0:1,nlayers) ! likewise for jt 76 integer, intent(in) :: ig ! ig indexes into lookup tables 77 integer, intent(in) :: indself(ncol,nlayers) ! self index array 78 integer, intent(in) :: indfor(ncol,nlayers) ! for index array 79 real(kind=wp), intent(out), dimension(ncol,nlayers) :: taug ! kernel result 80 real(kind=wp), intent(out), dimension(ncol,nlayers) :: fracs ! kernel result 81 82 ! Local variable 83 ! ============== 84 85 integer :: lay ! layer index 86 integer :: i ! specparm types index 87 integer :: icol ! column index 88 89 ! vector temporaries 90 ! ==================== 91 92 integer, dimension(1:3,1:3) :: caseTypeOperations 93 integer, dimension(ncol) :: caseType 94 real(kind=wp), dimension(ncol) :: p, p4, fs 95 real(kind=wp), dimension(ncol) :: fpl 96 real(kind=wp), dimension(ncol) :: specmult, speccomb, specparm 97 real(kind=wp), dimension(ncol) :: specmult_planck, speccomb_planck,specparm_planck 98 real(kind=wp), dimension(ncol,0:1) :: tau_major 99 real(kind=wp), dimension(ncol) :: taufor,tauself 100 integer, dimension(ncol) :: js, ind0, ind00, ind01, ind02, jpl 101 102 ! Register temporaries 103 ! ==================== 104 105 real(kind=wp) :: p2,fk0,fk1,fk2 106 real(kind=wp) :: refrat_planck_a, refrat_planck_b 107 108 !dir$ assume_aligned jp:64 109 !dir$ assume_aligned jt:64 110 !dir$ assume_aligned rat_h2oco2:64 111 !dir$ assume_aligned colco2:64 112 !dir$ assume_aligned colh2o:64 113 !dir$ assume_aligned fac0:64 114 !dir$ assume_aligned fac1:64 115 !dir$ assume_aligned taug:64 116 117 !dir$ assume_aligned p:64 118 !dir$ assume_aligned p4:64 119 !dir$ assume_aligned specmult:64 120 !dir$ assume_aligned speccomb:64 121 !dir$ assume_aligned specparm:64 122 !dir$ assume_aligned specmult_planck:64 123 !dir$ assume_aligned speccomb_planck:64 124 !dir$ assume_aligned specparm_planck:64 125 !dir$ assume_aligned indself:64 126 !dir$ assume_aligned indfor:64 127 !dir$ assume_aligned fs:64 128 !dir$ assume_aligned tau_major:64 129 130 !dir$ assume_aligned js:64 131 !dir$ assume_aligned ind0:64 132 !dir$ assume_aligned ind00:64 133 !dir$ assume_aligned ind01:64 134 !dir$ assume_aligned ind02:64 135 136 !dir$ assume_aligned caseTypeOperations:64 137 !dir$ assume_aligned caseType:64 138 139 ! Initialize Case type operations 140 !================================= 141 142 caseTypeOperations(1:3,1) = (/0, 1, 2/) 143 caseTypeOperations(1:3,2) = (/1, 0,-1/) 144 caseTypeOperations(1:3,3) = (/0, 1, 1/) 145 146 ! Minor gas mapping levels: 147 ! lower - n2o, p = 706.272 mbar, t = 278.94 k 148 ! upper - n2o, p = 95.58 mbar, t = 215.7 k 149 150 ! P = 212.725 mb 151 refrat_planck_a = chi_mls(1,11)/chi_mls(2,11) 152 153 ! P = 95.58 mb 154 refrat_planck_b = chi_mls(3,13)/chi_mls(2,13) 155 156 157 ! Lower atmosphere loop 158 ! ===================== 159 160 DO lay = 1,laytrop ! loop over layers 161 162 ! Compute tau_major term 163 ! ====================== 164 165 DO i=0,1 ! loop over specparm types 166 167 ! This loop should vectorize 168 ! ============================= 169 !dir$ SIMD 170 DO icol=1,ncol ! Vectorizes with dir - 14.0.2 171 speccomb(icol) = colh2o(icol,lay) + rat_h2oco2(icol,i,lay)*colco2(icol,lay) 172 specparm(icol) = colh2o(icol,lay)/speccomb(icol) 173 IF (specparm(icol) .GE. oneminus) specparm(icol) = oneminus 174 specmult(icol) = 8._wp*(specparm(icol)) 175 js(icol) = 1 + INT(specmult(icol)) 176 fs(icol) = MOD(specmult(icol),1.0_wp) 177 END DO 178 179 ! The only conditional loop 180 ! ========================= 181 182 DO icol=1,ncol ! Vectorizes as is 14.0.2 183 IF (specparm(icol) .LT. 0.125_wp) THEN 184 caseType(icol)=1 185 p(icol) = fs(icol) - 1.0_wp 186 p2 = p(icol)*p(icol) 187 p4(icol) = p2*p2 188 ELSE IF (specparm(icol) .GT. 0.875_wp) THEN 189 caseType(icol)=2 190 p(icol) = -fs(icol) 191 p2 = p(icol)*p(icol) 192 p4(icol) = p2*p2 193 ELSE 194 caseType(icol)=3 195 ! SIMD way of writing fk0= 1-fs and fk1 = fs, fk2=zero 196 ! =========================================================== 197 198 p4(icol) = 1.0_wp - fs(icol) 199 p(icol) = -p4(icol) ! complicated way of getting fk2 = zero for ELSE case 200 ENDIF 201 END DO 202 203 ! Vector/SIMD index loop calculation 204 ! ================================== 205 206 !dir$ SIMD 207 DO icol=1,ncol ! Vectorizes with dir 14.0.2 208 ind0(icol) = ((jp(icol,lay)-(1*MOD(i+1,2)))*5+(jt(icol,i,lay)-1))*nspa(4) +js(icol) 209 ind00(icol) = ind0(icol) + caseTypeOperations(1,caseType(icol)) 210 ind01(icol) = ind0(icol) + caseTypeOperations(2,caseType(icol)) 211 ind02(icol) = ind0(icol) + caseTypeOperations(3,caseType(icol)) 212 END DO 213 214 ! What we've been aiming for a nice flop intensive 215 ! SIMD/vectorizable loop! 216 ! 17 flops 217 ! 218 ! Albeit at the cost of a couple extra flops for the fk2 term 219 ! and a repeated lookup table access for the fk2 term in the 220 ! the ELSE case when specparm or specparm1 is (> .125 && < .875) 221 ! =============================================================== 222 223 !dir$ SIMD 224 DO icol=1,ncol ! Vectorizes with dir 14.0.2 225 226 fk0 = p4(icol) 227 fk1 = 1.0_wp - p(icol) - 2.0_wp*p4(icol) 228 fk2 = p(icol) + p4(icol) 229 tau_major(icol,i) = speccomb(icol) * ( & 230 fac0(icol,i,lay)*(fk0*absa(ind00(icol),ig) + & 231 fk1*absa(ind01(icol),ig) + & 232 fk2*absa(ind02(icol),ig)) + & 233 fac1(icol,i,lay)*(fk0*absa(ind00(icol)+9,ig) + & 234 fk1*absa(ind01(icol)+9,ig) + & 235 fk2*absa(ind02(icol)+9,ig))) 236 END DO 237 238 END DO ! end loop over specparm types for tau_major calculation 239 240 ! Compute taufor and tauself terms: 241 ! Note the use of 1D bilinear interpolation of selfref and forref 242 ! lookup table values 243 ! =================================================================================== 244 245 !dir$ SIMD 246 DO icol=1,ncol ! Vectorizes with dir 14.0.2 247 tauself(icol) = selffac(icol,lay)*(selfref(indself(icol,lay),ig) +& 248 selffrac(icol,lay)*(selfref(indself(icol,lay)+1,ig)- selfref(indself(icol,lay),ig))) 249 taufor(icol) = forfac(icol,lay)*( forref(indfor(icol,lay),ig) +& 250 forfrac(icol,lay)*( forref(indfor(icol,lay)+1,ig) -forref(indfor(icol,lay),ig))) 251 END DO 252 253 ! Compute taug, one of two terms returned by the lower atmosphere 254 ! kernel (the other is fracs) 255 ! This loop could be parallelized over specparm types (i) but might 256 ! produce 257 ! different results for different thread counts 258 ! =========================================================================================== 259 260 !dir$ SIMD ! DOES NOT VECTORIZE even with SIMD dir 14.0.2 261 DO icol=1,ncol 262 taug(icol,lay) = tau_major(icol,0) + tau_major(icol,1) +tauself(icol) + taufor(icol) 263 END DO 264 265 !dir$ SIMD ! vectorizes with dir 14.0.2 266 DO icol=1,ncol 267 speccomb_planck(icol) = colh2o(icol,lay)+refrat_planck_a*colco2(icol,lay) 268 specparm_planck(icol) = colh2o(icol,lay)/speccomb_planck(icol) 269 END DO 270 271 DO icol=1,ncol ! vectorizes as is 14.0.2 272 IF (specparm_planck(icol) .GE. oneminus) specparm_planck(icol)=oneminus 273 END DO 274 275 !dir$ SIMD 276 DO icol=1,ncol !vectorizes with dir 14.0.2 277 specmult_planck(icol) = 8.0_wp*specparm_planck(icol) 278 jpl(icol)= 1 + INT(specmult_planck(icol)) 279 fpl(icol) = MOD(specmult_planck(icol),1.0_wp) 280 fracs(icol,lay) = fracrefa(ig,jpl(icol)) + fpl(icol)*(fracrefa(ig,jpl(icol)+1)-fracrefa(ig,jpl(icol))) 281 END DO 282 END DO ! end lower atmosphere loop 283 END SUBROUTINE taumol04_lwr 284 285 286 SUBROUTINE taumol04_upr(ncol, laytrop, nlayers, & 287 rat_o3co2, colco2, colo3, coldry, & 288 fac0, fac1, minorfrac, & 289 forfac,forfrac, & 290 jp, jt, ig, & 291 indfor, & 292 taug, fracs) 293 294 use mo_kind, only : wp 295 USE mo_lrtm_setup, ONLY: nspa 296 USE mo_lrtm_setup, ONLY: nspb 297 USE rrlw_planck, ONLY: chi_mls 298 USE rrlw_kg04, ONLY: selfref 299 USE rrlw_kg04, ONLY: forref 300 USE rrlw_kg04, ONLY: absb 301 USE rrlw_kg04, ONLY: fracrefb 302 303 IMPLICIT NONE 304 305 real(kind=wp), PARAMETER :: oneminus = 1.0_wp - 1.0e-06_wp 306 307 integer, intent(in) :: ncol ! number of simd columns 308 integer, intent(in) :: laytrop ! number of layers for lower atmosphere kernel 309 integer, intent(in) :: nlayers ! total number of layers 310 real(kind=wp), intent(in), dimension(ncol,0:1,nlayers) :: rat_o3co2,fac0,fac1 ! not sure of ncol depend 311 real(kind=wp), intent(in), dimension(ncol,nlayers) :: colco2,colo3,coldry ! these appear to be gas concentrations 312 real(kind=wp), intent(in), dimension(ncol,nlayers) :: forfac,forfrac ! not sure of ncol depend 313 real(kind=wp), intent(in), dimension(ncol,nlayers) :: minorfrac ! not sure of ncol depend 314 315 ! Look up tables and related lookup indices 316 ! I assume all lookup indices depend on 3D position 317 ! ================================================= 318 319 integer, intent(in) :: jp(ncol,nlayers) ! I assume jp depends on ncol 320 integer, intent(in) :: jt(ncol,0:1,nlayers) ! likewise for jt 321 integer, intent(in) :: ig ! ig indexes into lookup tables 322 integer, intent(in) :: indfor(ncol,nlayers) ! for index array 323 real(kind=wp), intent(out), dimension(ncol,nlayers) :: taug ! kernel result 324 real(kind=wp), intent(out), dimension(ncol,nlayers) :: fracs ! kernel result 325 326 ! Local variable 327 ! ============== 328 329 integer :: lay ! layer index 330 integer :: i ! specparm types index 331 integer :: icol ! column index 332 333 ! vector temporaries 334 ! ==================== 335 336 real(kind=wp), dimension(ncol) :: fs 337 real(kind=wp), dimension(ncol) :: fpl 338 real(kind=wp), dimension(ncol) :: specmult, speccomb, specparm 339 real(kind=wp), dimension(ncol) :: specmult_planck, speccomb_planck,specparm_planck 340 real(kind=wp), dimension(ncol,0:1) :: tau_major 341 real(kind=wp), dimension(ncol) :: taufor,tauself 342 integer, dimension(ncol) :: js, ind0, jpl 343 344 ! Register temporaries 345 ! ==================== 346 347 real(kind=wp) :: p2,fk0,fk1,fk2 348 real(kind=wp) :: refrat_planck_a, refrat_planck_b 349 REAL(KIND=wp), dimension(ngc(4)) :: stratcorrect = (/ 1., 1., 1., 1.,1., 1., 1., .92, .88, 1.07, 1.1, & 350 .99, .88, .943 /) 351 !dir$ assume_aligned jp:64 352 !dir$ assume_aligned jt:64 353 !dir$ assume_aligned rat_o3co2:64 354 !dir$ assume_aligned colco2:64 355 !dir$ assume_aligned colo3:64 356 !dir$ assume_aligned fac0:64 357 !dir$ assume_aligned fac1:64 358 !dir$ assume_aligned taug:64 359 360 !dir$ assume_aligned specmult:64 361 !dir$ assume_aligned speccomb:64 362 !dir$ assume_aligned specparm:64 363 !dir$ assume_aligned specmult_planck:64 364 !dir$ assume_aligned speccomb_planck:64 365 !dir$ assume_aligned specparm_planck:64 366 !dir$ assume_aligned fs:64 367 !dir$ assume_aligned tau_major:64 368 369 !dir$ assume_aligned js:64 370 !dir$ assume_aligned ind0:64 371 !dir$ assume_aligned jpl:64 372 !dir$ assume_aligned fpl:64 373 374 375 ! Upper atmosphere loop 376 ! ======================== 377 refrat_planck_b = chi_mls(3,13)/chi_mls(2,13) 378 DO lay = laytrop+1, nlayers 379 380 DO i=0,1 ! loop over specparm types 381 382 ! This loop should vectorize 383 ! ============================= 384 385 !dir$ SIMD 386 DO icol=1,ncol ! Vectorizes with dir 14.0.2 387 speccomb(icol) = colo3(icol,lay) + rat_o3co2(icol,i,lay)*colco2(icol,lay) 388 specparm(icol) = colo3(icol,lay)/speccomb(icol) 389 IF (specparm(icol) .ge. oneminus) specparm(icol) = oneminus 390 specmult(icol) = 4.0_wp*(specparm(icol)) 391 js(icol) = 1 + INT(specmult(icol)) 392 fs(icol) = MOD(specmult(icol),1.0_wp) 393 ind0(icol) = ((jp(icol,lay)-13+i)*5+(jt(icol,i,lay)-1))*nspb(4) +js(icol) 394 END DO 395 396 !dir$ SIMD 397 DO icol=1,ncol ! Vectorizes with dir 14.0.2 398 tau_major(icol,i) = speccomb(icol) * & 399 ((1.0_wp - fs(icol))*fac0(icol,i,lay)*absb(ind0(icol) ,ig) + & 400 fs(icol) *fac0(icol,i,lay)*absb(ind0(icol)+1,ig) + & 401 (1.0_wp - fs(icol))*fac1(icol,i,lay)*absb(ind0(icol)+5,ig) + & 402 fs(icol) *fac1(icol,i,lay)*absb(ind0(icol)+6,ig)) 403 END DO 404 405 END DO ! end loop over specparm types for tau_major calculation 406 407 ! Compute taufor terms 408 ! Note the use of 1D bilinear interpolation of selfref and forref lookup 409 ! table values 410 ! =================================================================================== 411 412 !dir$ SIMD 413 DO icol=1,ncol ! loop vectorizes with directive 14.0.2 414 taug(icol,lay) = (tau_major(icol,0) + tau_major(icol,1) ) * stratcorrect(ig) 415 END DO 416 417 !dir$ SIMD 418 DO icol=1,ncol ! loop vectorizes with directive 14.0.2 419 speccomb_planck(icol) = colo3(icol,lay)+refrat_planck_b*colco2(icol,lay) 420 specparm_planck(icol) = colo3(icol,lay)/speccomb_planck(icol) 421 END DO 422 423 !dir$ SIMD 424 DO icol=1,ncol ! loop vectorizes with directive 14.0.2 425 IF (specparm_planck(icol) .GE. oneminus) specparm_planck(icol)=oneminus 426 specmult_planck(icol) = 4.0_wp*specparm_planck(icol) 427 jpl(icol)= 1 + INT(specmult_planck(icol)) 428 fpl(icol) = MOD(specmult_planck(icol),1.0_wp) 429 fracs(icol,lay) = fracrefb(ig,jpl(icol)) + fpl(icol)*(fracrefb(ig,jpl(icol)+1)-fracrefb(ig,jpl(icol))) 430 END DO 431 END DO ! nlayers loop 432 433 END SUBROUTINE taumol04_upr 434 435END MODULE mo_taumol04 436