1! ======================================================================================================
2! This kernel represents a distillation of *part* of
3! the taumol04 calculation in the gas optics part of the PSRAD
4! atmospheric
5! radiation code.
6!
7! It is meant to show conceptually how one might "SIMD-ize" swaths of
8! the taumol04 code related to calculating the
9! taug term, so that the impact of the conditional expression on
10! specparm could be reduced and at least partial vectorization
11! across columns could be achieved.
12!
13! I consider it at this point to be "compiling pseudo-code".
14!
15! By this I mean that the code as written compiles under ifort, but has
16! not been tested
17! for correctness, nor I have written a driver routine for it. It does
18! not contain everything
19! that is going on in the taug parent taumol04 code, but I don't claim
20! to actually completely
21! understand the physical meaning of all or even most of the inputs
22! required to make it run.
23!
24! It has been written to vectorize, but apparently does not actually do
25! that
26! under the ifort V13 compiler with the -xHost -O3 level of
27! optimization, even with !dir$ assume_aligned directives.
28! I hypothesize that the compiler is baulking to do so for the indirect
29! addressed calls into the absa
30! look-up table, either that or 4 byte integers may be causing alignment
31! issues relative to 8 byte reals. Anyway,
32! it seems to complain about the key loop being too complex.
33! ======================================================================================================
34MODULE mo_taumol04
35    USE mo_kind, only:wp
36    USE mo_lrtm_setup, ONLY: nspa
37    USE mo_lrtm_setup, ONLY: nspb
38    USE mo_lrtm_setup, ONLY: ngc
39    USE rrlw_planck, ONLY: chi_mls
40    USE rrlw_kg04, ONLY: selfref
41    USE rrlw_kg04, ONLY: forref
42    USE rrlw_kg04, ONLY: absa
43    USE rrlw_kg04, ONLY: fracrefa
44    USE rrlw_kg04, ONLY: absb
45    USE rrlw_kg04, ONLY: fracrefb
46    IMPLICIT NONE
47    PRIVATE
48    PUBLIC taumol04_lwr,taumol04_upr
49    CONTAINS
50    SUBROUTINE taumol04_lwr(ncol, laytrop, nlayers,         &
51                rat_h2oco2, colco2, colh2o, coldry, &
52                fac0, fac1, minorfrac,                      &
53                selffac,selffrac,forfac,forfrac,            &
54                jp, jt, ig, indself, &
55                indfor, &
56                taug, fracs)
57        IMPLICIT NONE
58
59        real(kind=wp), PARAMETER :: oneminus = 1.0_wp - 1.0e-06_wp
60        integer, intent(in) :: ncol                  ! number of simd columns
61        integer, intent(in) :: laytrop               ! number of layers forwer atmosphere kernel
62        integer, intent(in) :: nlayers               ! total number of layers
63        real(kind=wp), intent(in), dimension(ncol,0:1,nlayers) :: rat_h2oco2,fac0,fac1    ! not sure of ncol depend
64        real(kind=wp), intent(in), dimension(ncol,nlayers) :: colco2,colh2o,coldry ! these appear to be gas concentrations
65
66        real(kind=wp), intent(in), dimension(ncol,nlayers) :: selffac,selffrac            ! not sure of ncol depend
67        real(kind=wp), intent(in), dimension(ncol,nlayers) :: forfac,forfrac              ! not sure of ncol depend
68        real(kind=wp), intent(in), dimension(ncol,nlayers) :: minorfrac                   ! not sure of ncol depend
69
70        ! Look up tables and related lookup indices
71        ! I assume all lookup indices depend on 3D position
72        ! =================================================
73
74        integer, intent(in) :: jp(ncol,nlayers)      ! I assume jp depends on ncol
75        integer, intent(in) :: jt(ncol,0:1,nlayers)  ! likewise for jt
76        integer, intent(in) :: ig                    ! ig indexes into lookup tables
77        integer, intent(in) :: indself(ncol,nlayers) ! self index array
78        integer, intent(in) :: indfor(ncol,nlayers)  ! for index array
79        real(kind=wp), intent(out), dimension(ncol,nlayers) :: taug  ! kernel result
80        real(kind=wp), intent(out), dimension(ncol,nlayers) :: fracs ! kernel result
81
82        ! Local variable
83        ! ==============
84
85        integer :: lay   ! layer index
86        integer :: i     ! specparm types index
87        integer :: icol  ! column index
88
89        ! vector temporaries
90        ! ====================
91
92        integer, dimension(1:3,1:3) :: caseTypeOperations
93        integer, dimension(ncol)    :: caseType
94        real(kind=wp), dimension(ncol) :: p, p4, fs
95        real(kind=wp), dimension(ncol) :: fpl
96        real(kind=wp), dimension(ncol) :: specmult, speccomb, specparm
97        real(kind=wp), dimension(ncol) :: specmult_planck, speccomb_planck,specparm_planck
98        real(kind=wp), dimension(ncol,0:1) :: tau_major
99        real(kind=wp), dimension(ncol) :: taufor,tauself
100        integer, dimension(ncol) :: js, ind0, ind00, ind01, ind02, jpl
101
102        ! Register temporaries
103        ! ====================
104
105        real(kind=wp) :: p2,fk0,fk1,fk2
106        real(kind=wp) :: refrat_planck_a, refrat_planck_b
107
108        !dir$ assume_aligned jp:64
109        !dir$ assume_aligned jt:64
110        !dir$ assume_aligned rat_h2oco2:64
111        !dir$ assume_aligned colco2:64
112        !dir$ assume_aligned colh2o:64
113        !dir$ assume_aligned fac0:64
114        !dir$ assume_aligned fac1:64
115        !dir$ assume_aligned taug:64
116
117        !dir$ assume_aligned p:64
118        !dir$ assume_aligned p4:64
119        !dir$ assume_aligned specmult:64
120        !dir$ assume_aligned speccomb:64
121        !dir$ assume_aligned specparm:64
122        !dir$ assume_aligned specmult_planck:64
123        !dir$ assume_aligned speccomb_planck:64
124        !dir$ assume_aligned specparm_planck:64
125        !dir$ assume_aligned indself:64
126        !dir$ assume_aligned indfor:64
127        !dir$ assume_aligned fs:64
128        !dir$ assume_aligned tau_major:64
129
130        !dir$ assume_aligned js:64
131        !dir$ assume_aligned ind0:64
132        !dir$ assume_aligned ind00:64
133        !dir$ assume_aligned ind01:64
134        !dir$ assume_aligned ind02:64
135
136        !dir$ assume_aligned caseTypeOperations:64
137        !dir$ assume_aligned caseType:64
138
139        ! Initialize Case type operations
140        !=================================
141
142        caseTypeOperations(1:3,1) = (/0, 1, 2/)
143        caseTypeOperations(1:3,2) = (/1, 0,-1/)
144        caseTypeOperations(1:3,3) = (/0, 1, 1/)
145
146        ! Minor gas mapping levels:
147        !     lower - n2o, p = 706.272 mbar, t = 278.94 k
148        !     upper - n2o, p = 95.58 mbar, t = 215.7 k
149
150        !  P = 212.725 mb
151        refrat_planck_a = chi_mls(1,11)/chi_mls(2,11)
152
153        !  P = 95.58 mb
154        refrat_planck_b = chi_mls(3,13)/chi_mls(2,13)
155
156
157        ! Lower atmosphere loop
158        ! =====================
159
160        DO lay = 1,laytrop  ! loop over layers
161
162        ! Compute tau_major term
163        ! ======================
164
165            DO i=0,1         ! loop over specparm types
166
167                ! This loop should vectorize
168                ! =============================
169                !dir$ SIMD
170                DO icol=1,ncol  ! Vectorizes with dir - 14.0.2
171                    speccomb(icol) = colh2o(icol,lay) + rat_h2oco2(icol,i,lay)*colco2(icol,lay)
172                    specparm(icol) = colh2o(icol,lay)/speccomb(icol)
173                    IF (specparm(icol) .GE. oneminus) specparm(icol) = oneminus
174                    specmult(icol) = 8._wp*(specparm(icol))
175                    js(icol) = 1 + INT(specmult(icol))
176                    fs(icol) = MOD(specmult(icol),1.0_wp)
177                END DO
178
179                ! The only conditional loop
180                ! =========================
181
182                DO icol=1,ncol  ! Vectorizes as is 14.0.2
183                    IF (specparm(icol) .LT. 0.125_wp) THEN
184                        caseType(icol)=1
185                        p(icol)  = fs(icol) - 1.0_wp
186                        p2 = p(icol)*p(icol)
187                        p4(icol) = p2*p2
188                    ELSE IF (specparm(icol) .GT. 0.875_wp) THEN
189                        caseType(icol)=2
190                        p(icol)  = -fs(icol)
191                        p2 = p(icol)*p(icol)
192                        p4(icol) = p2*p2
193                    ELSE
194                        caseType(icol)=3
195                        ! SIMD way of writing fk0= 1-fs and fk1 = fs, fk2=zero
196                        ! ===========================================================
197
198                        p4(icol) = 1.0_wp - fs(icol)
199                        p(icol) = -p4(icol)          ! complicated way of getting fk2 = zero for ELSE case
200                    ENDIF
201                END DO
202
203                ! Vector/SIMD index loop calculation
204                ! ==================================
205
206                !dir$ SIMD
207                DO icol=1,ncol  ! Vectorizes with dir 14.0.2
208                    ind0(icol) = ((jp(icol,lay)-(1*MOD(i+1,2)))*5+(jt(icol,i,lay)-1))*nspa(4) +js(icol)
209                    ind00(icol) = ind0(icol) + caseTypeOperations(1,caseType(icol))
210                    ind01(icol) = ind0(icol) + caseTypeOperations(2,caseType(icol))
211                    ind02(icol) = ind0(icol) + caseTypeOperations(3,caseType(icol))
212                END DO
213
214                ! What we've been aiming for a nice flop intensive
215                ! SIMD/vectorizable loop!
216                ! 17 flops
217                !
218                ! Albeit at the cost of a couple extra flops for the fk2 term
219                ! and a repeated lookup table access for the fk2 term in the
220                ! the ELSE case when specparm or specparm1 is  (> .125 && < .875)
221                ! ===============================================================
222
223                !dir$ SIMD
224                DO icol=1,ncol ! Vectorizes with dir 14.0.2
225
226                    fk0 = p4(icol)
227                    fk1 = 1.0_wp - p(icol) - 2.0_wp*p4(icol)
228                    fk2 = p(icol) + p4(icol)
229                    tau_major(icol,i) = speccomb(icol) * ( &
230                    fac0(icol,i,lay)*(fk0*absa(ind00(icol),ig)  + &
231                    fk1*absa(ind01(icol),ig)  + &
232                    fk2*absa(ind02(icol),ig)) + &
233                    fac1(icol,i,lay)*(fk0*absa(ind00(icol)+9,ig)  + &
234                    fk1*absa(ind01(icol)+9,ig)  + &
235                    fk2*absa(ind02(icol)+9,ig)))
236                END DO
237
238            END DO ! end loop over specparm types for tau_major calculation
239
240            ! Compute taufor and tauself terms:
241            ! Note the use of 1D bilinear interpolation of selfref and forref
242            ! lookup table values
243            ! ===================================================================================
244
245            !dir$ SIMD
246            DO icol=1,ncol  ! Vectorizes with dir 14.0.2
247                tauself(icol) = selffac(icol,lay)*(selfref(indself(icol,lay),ig) +&
248                selffrac(icol,lay)*(selfref(indself(icol,lay)+1,ig)- selfref(indself(icol,lay),ig)))
249                taufor(icol) =  forfac(icol,lay)*( forref(indfor(icol,lay),ig) +&
250                forfrac(icol,lay)*( forref(indfor(icol,lay)+1,ig) -forref(indfor(icol,lay),ig)))
251            END DO
252
253            ! Compute taug, one of two terms returned by the lower atmosphere
254            ! kernel (the other is fracs)
255            ! This loop could be parallelized over specparm types (i) but might
256            ! produce
257            ! different results for different thread counts
258            ! ===========================================================================================
259
260            !dir$ SIMD  ! DOES NOT VECTORIZE even with SIMD dir 14.0.2
261            DO icol=1,ncol
262                taug(icol,lay) = tau_major(icol,0) + tau_major(icol,1) +tauself(icol) + taufor(icol)
263            END DO
264
265            !dir$ SIMD ! vectorizes with dir 14.0.2
266            DO icol=1,ncol
267                speccomb_planck(icol) = colh2o(icol,lay)+refrat_planck_a*colco2(icol,lay)
268                specparm_planck(icol) = colh2o(icol,lay)/speccomb_planck(icol)
269            END DO
270
271            DO icol=1,ncol ! vectorizes as is 14.0.2
272                IF (specparm_planck(icol) .GE. oneminus) specparm_planck(icol)=oneminus
273            END DO
274
275            !dir$ SIMD
276            DO icol=1,ncol !vectorizes with dir 14.0.2
277                specmult_planck(icol) = 8.0_wp*specparm_planck(icol)
278                jpl(icol)= 1 + INT(specmult_planck(icol))
279                fpl(icol) = MOD(specmult_planck(icol),1.0_wp)
280                fracs(icol,lay) = fracrefa(ig,jpl(icol)) + fpl(icol)*(fracrefa(ig,jpl(icol)+1)-fracrefa(ig,jpl(icol)))
281            END DO
282        END DO ! end lower atmosphere loop
283    END SUBROUTINE taumol04_lwr
284
285
286    SUBROUTINE taumol04_upr(ncol, laytrop, nlayers,                     &
287                rat_o3co2, colco2, colo3, coldry, &
288                fac0, fac1, minorfrac, &
289                forfac,forfrac,        &
290                jp, jt, ig,        &
291                indfor, &
292                taug, fracs)
293
294        use mo_kind, only : wp
295        USE mo_lrtm_setup, ONLY: nspa
296        USE mo_lrtm_setup, ONLY: nspb
297        USE rrlw_planck, ONLY: chi_mls
298        USE rrlw_kg04, ONLY: selfref
299        USE rrlw_kg04, ONLY: forref
300        USE rrlw_kg04, ONLY: absb
301        USE rrlw_kg04, ONLY: fracrefb
302
303        IMPLICIT NONE
304
305        real(kind=wp), PARAMETER :: oneminus = 1.0_wp - 1.0e-06_wp
306
307        integer, intent(in) :: ncol                  ! number of simd columns
308        integer, intent(in) :: laytrop               ! number of layers for lower atmosphere kernel
309        integer, intent(in) :: nlayers               ! total number of layers
310        real(kind=wp), intent(in), dimension(ncol,0:1,nlayers) :: rat_o3co2,fac0,fac1    ! not sure of ncol depend
311        real(kind=wp), intent(in), dimension(ncol,nlayers) :: colco2,colo3,coldry ! these appear to be gas concentrations
312        real(kind=wp), intent(in), dimension(ncol,nlayers) :: forfac,forfrac ! not sure of ncol depend
313        real(kind=wp), intent(in), dimension(ncol,nlayers) :: minorfrac      ! not sure of ncol depend
314
315        ! Look up tables and related lookup indices
316        ! I assume all lookup indices depend on 3D position
317        ! =================================================
318
319        integer, intent(in) :: jp(ncol,nlayers)      ! I assume jp depends on ncol
320        integer, intent(in) :: jt(ncol,0:1,nlayers)  ! likewise for jt
321        integer, intent(in) :: ig                    ! ig indexes into lookup tables
322        integer, intent(in) :: indfor(ncol,nlayers)  ! for index array
323        real(kind=wp), intent(out), dimension(ncol,nlayers) :: taug  ! kernel result
324        real(kind=wp), intent(out), dimension(ncol,nlayers) :: fracs ! kernel result
325
326        ! Local variable
327        ! ==============
328
329        integer :: lay   ! layer index
330        integer :: i     ! specparm types index
331        integer :: icol  ! column index
332
333        ! vector temporaries
334        ! ====================
335
336        real(kind=wp), dimension(ncol) :: fs
337        real(kind=wp), dimension(ncol) :: fpl
338        real(kind=wp), dimension(ncol) :: specmult, speccomb, specparm
339        real(kind=wp), dimension(ncol) :: specmult_planck, speccomb_planck,specparm_planck
340        real(kind=wp), dimension(ncol,0:1) :: tau_major
341        real(kind=wp), dimension(ncol) :: taufor,tauself
342        integer, dimension(ncol) :: js, ind0, jpl
343
344        ! Register temporaries
345        ! ====================
346
347        real(kind=wp) :: p2,fk0,fk1,fk2
348        real(kind=wp) :: refrat_planck_a, refrat_planck_b
349        REAL(KIND=wp), dimension(ngc(4)) :: stratcorrect = (/ 1., 1., 1., 1.,1., 1., 1., .92, .88, 1.07, 1.1, &
350                                                        .99, .88, .943 /)
351        !dir$ assume_aligned jp:64
352        !dir$ assume_aligned jt:64
353        !dir$ assume_aligned rat_o3co2:64
354        !dir$ assume_aligned colco2:64
355        !dir$ assume_aligned colo3:64
356        !dir$ assume_aligned fac0:64
357        !dir$ assume_aligned fac1:64
358        !dir$ assume_aligned taug:64
359
360        !dir$ assume_aligned specmult:64
361        !dir$ assume_aligned speccomb:64
362        !dir$ assume_aligned specparm:64
363        !dir$ assume_aligned specmult_planck:64
364        !dir$ assume_aligned speccomb_planck:64
365        !dir$ assume_aligned specparm_planck:64
366        !dir$ assume_aligned fs:64
367        !dir$ assume_aligned tau_major:64
368
369        !dir$ assume_aligned js:64
370        !dir$ assume_aligned ind0:64
371        !dir$ assume_aligned jpl:64
372        !dir$ assume_aligned fpl:64
373
374
375        ! Upper atmosphere loop
376        ! ========================
377        refrat_planck_b = chi_mls(3,13)/chi_mls(2,13)
378        DO lay = laytrop+1, nlayers
379
380            DO i=0,1         ! loop over specparm types
381
382            ! This loop should vectorize
383            ! =============================
384
385            !dir$ SIMD
386                DO icol=1,ncol ! Vectorizes with dir 14.0.2
387                    speccomb(icol) = colo3(icol,lay) + rat_o3co2(icol,i,lay)*colco2(icol,lay)
388                    specparm(icol) = colo3(icol,lay)/speccomb(icol)
389                    IF (specparm(icol) .ge. oneminus) specparm(icol) = oneminus
390                    specmult(icol) = 4.0_wp*(specparm(icol))
391                    js(icol) = 1 + INT(specmult(icol))
392                    fs(icol) = MOD(specmult(icol),1.0_wp)
393                    ind0(icol) = ((jp(icol,lay)-13+i)*5+(jt(icol,i,lay)-1))*nspb(4) +js(icol)
394                END DO
395
396                !dir$ SIMD
397                DO icol=1,ncol ! Vectorizes with dir 14.0.2
398                    tau_major(icol,i) = speccomb(icol) * &
399                    ((1.0_wp - fs(icol))*fac0(icol,i,lay)*absb(ind0(icol)  ,ig) + &
400                    fs(icol) *fac0(icol,i,lay)*absb(ind0(icol)+1,ig) + &
401                    (1.0_wp - fs(icol))*fac1(icol,i,lay)*absb(ind0(icol)+5,ig) + &
402                    fs(icol) *fac1(icol,i,lay)*absb(ind0(icol)+6,ig))
403                END DO
404
405            END DO ! end loop over specparm types for tau_major calculation
406
407            ! Compute taufor terms
408            ! Note the use of 1D bilinear interpolation of selfref and forref lookup
409            ! table values
410            ! ===================================================================================
411
412            !dir$ SIMD
413            DO icol=1,ncol ! loop vectorizes with directive 14.0.2
414                taug(icol,lay) =  (tau_major(icol,0) + tau_major(icol,1) ) * stratcorrect(ig)
415            END DO
416
417            !dir$ SIMD
418            DO icol=1,ncol ! loop vectorizes with directive 14.0.2
419                speccomb_planck(icol) = colo3(icol,lay)+refrat_planck_b*colco2(icol,lay)
420                specparm_planck(icol) = colo3(icol,lay)/speccomb_planck(icol)
421            END DO
422
423            !dir$ SIMD
424            DO icol=1,ncol  ! loop vectorizes with directive 14.0.2
425                IF (specparm_planck(icol) .GE. oneminus) specparm_planck(icol)=oneminus
426                specmult_planck(icol) = 4.0_wp*specparm_planck(icol)
427                jpl(icol)= 1 + INT(specmult_planck(icol))
428                fpl(icol) = MOD(specmult_planck(icol),1.0_wp)
429                fracs(icol,lay) =  fracrefb(ig,jpl(icol)) + fpl(icol)*(fracrefb(ig,jpl(icol)+1)-fracrefb(ig,jpl(icol)))
430            END DO
431        END DO  ! nlayers loop
432
433    END SUBROUTINE taumol04_upr
434
435END MODULE mo_taumol04
436